lots of stuff - tracking multiple headers, include dependencies, preprocessor logic, etc

svn path=/trunk/; revision=6542
This commit is contained in:
Royce Mitchell III 2003-11-06 05:55:26 +00:00
parent 46b4ed3668
commit 0fadf54b2f
10 changed files with 503 additions and 59 deletions

View file

@ -1,3 +1,4 @@
*.ncb
*.plg
Debug
Debug
*.opt

View file

@ -0,0 +1,26 @@
// Header.h
#ifndef HEADER_H
#define HEADER_H
#include "Symbol.h"
class Header
{
public:
std::string filename;
std::vector<std::string> includes, libc_includes, pragmas;
std::vector<Symbol*> symbols;
bool done, externc;
std::vector<std::string> ifs, ifspreproc;
Header ( const std::string& filename_ )
: filename(filename_)
{
done = false;
externc = false;
}
};
#endif//HEADER_H

View file

@ -0,0 +1,18 @@
// Symbol.h
#ifndef SYMBOL_H
#define SYMBOL_H
#include "Type.h"
class Symbol
{
public:
Type type;
std::vector<std::string> names;
std::vector<std::string> dependencies;
std::vector<std::string> ifs;
std::string definition;
};
#endif//SYMBOL_H

View file

@ -0,0 +1,18 @@
// Type.h
#ifndef TYPE_H
#define TYPE_H
typedef enum
{
T_UNKNOWN = -1,
T_TIDENT,
T_MACRO,
T_DEFINE,
T_VARIABLE,
T_FUNCTION,
T_FUNCTION_PTR,
T_STRUCT
} Type;
#endif//TYPE_H

View file

@ -15,42 +15,122 @@ bool iskeyword ( const string& ident )
#define I(s) if ( ident == #s ) return true;
switch ( ident[0] )
{
case '_':
I(__cdecl);
I(__declspec);
I(__except);
I(__fastcall);
I(__finally);
I(__inline);
I(__int8);
I(__int16);
I(__int32);
I(__int64);
I(__leave);
I(__stdcall);
I(__try);
break;
case 'b':
I(bool);
I(break);
break;
case 'c':
I(case);
I(catch);
I(char);
I(class);
I(const);
I(const_cast);
I(continue);
break;
case 'd':
I(default);
I(delete);
I(dllexport);
I(dllimport);
I(do);
I(double);
I(dynamic_cast);
break;
case 'e':
I(else);
I(enum);
I(explicit);
I(extern);
break;
case 'f':
I(false);
I(float);
I(for);
I(friend);
break;
case 'g':
I(goto);
break;
case 'i':
I(if);
I(inline);
I(int);
break;
case 'l':
I(long);
break;
case 'm':
I(mutable);
break;
case 'n':
I(naked);
I(namespace);
I(new);
I(noreturn);
break;
case 'o':
I(operator);
break;
case 'p':
I(private);
I(protected);
I(public);
break;
case 'r':
I(register);
I(reinterpret_cast);
I(return);
break;
case 's':
I(short);
I(signed);
I(sizeof);
I(static);
I(static_cast);
I(struct);
I(switch);
break;
case 't':
I(template);
I(this);
I(thread);
I(throw);
I(true);
I(try);
I(typedef);
I(typeid);
I(typename);
break;
case 'u':
I(union);
I(unsigned);
I(using);
I(uuid);
I(__uuidof);
break;
case 'v':
I(virtual);
I(void);
I(volatile);
break;
case 'w':
I(wmain);
I(while);
break;
}

View file

@ -4,10 +4,15 @@
#pragma warning ( disable : 4786 )
#endif//_MSC_VER
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <string>
#include <vector>
#include <conio.h>
#include "EnumFilesImpl.h"
#include "assert.h"
#include "File.h"
#include "binary2cstr.h"
@ -15,31 +20,28 @@
#include "tokenize.h"
#include "skip_ws.h"
#include "iskeyword.h"
#include "Type.h"
#include "Header.h"
using std::string;
using std::vector;
typedef enum
{
T_UNKNOWN = -1,
T_MACRO,
T_DEFINE,
T_VARIABLE,
T_FUNCTION,
T_FUNCTION_PTR,
T_STRUCT
} Type;
vector<Header*> headers;
bool import_file ( const char* filename );
char* findend ( char* p );
char* findend ( char* p, bool& externc );
Type identify ( const vector<string>& tokens, int off = 0 );
Type process ( const string& element, vector<string>& names, bool& isTypedef, vector<string>& dependencies );
void process_preprocessor ( const char* filename, Header& h, const string& element );
void process_c ( Header& h, const string& element );
int parse_type ( Type t, const vector<string>& tokens, int off, vector<string>& names, vector<string>& dependencies );
int parse_tident ( const vector<string>& tokens, int off, vector<string>& names, vector<string>& dependencies );
int parse_variable ( const vector<string>& tokens, int off, vector<string>& names, vector<string>& dependencies );
int parse_struct ( const vector<string>& tokens, int off, vector<string>& names, vector<string>& dependencies );
int parse_function ( const vector<string>& tokens, int off, vector<string>& names, vector<string>& dependencies );
int parse_function_ptr ( const vector<string>& tokens, int off, vector<string>& names, vector<string>& dependencies );
/*
#ifndef ASSERT
#define ASSERT(x) \
do \
@ -52,19 +54,50 @@ do \
} \
}while(0)
#endif//ASSERT
*/
bool is_libc_include ( const string& inc )
{
string s ( inc );
strlwr ( &s[0] );
if ( s == "limits.h" )
return true;
if ( s == "stdarg.h" )
return true;
if ( s == "basetsd.h" )
return true;
return false;
}
BOOL FileEnumProc ( PWIN32_FIND_DATA pwfd, const char* filename, long lParam )
{
if ( !is_libc_include ( filename ) )
import_file ( filename );
return TRUE;
}
void main()
{
import_file ( "test.h" );
EnumFilesInDirectory ( "c:/cvs/reactos/include", "*.h", FileEnumProc, 0, TRUE, FALSE );
}
bool import_file ( const char* filename )
{
int i;
for ( i = 0; i < headers.size(); i++ )
{
if ( headers[i]->filename == filename )
return true;
}
printf ( "%s\n", filename );
string s;
if ( !File::LoadIntoString ( s, filename ) )
{
printf ( "Couldn't load \"%s\" for input.\n", filename );
return false;
exit(0);
}
// strip comments from the file...
@ -76,6 +109,9 @@ bool import_file ( const char* filename )
File::SaveFromString ( s, no_comments );
}*/
Header* h = new Header ( filename );
headers.push_back ( h );
char* p = &s[0];
while ( p )
{
@ -86,52 +122,227 @@ bool import_file ( const char* filename )
// check for pre-processor command
if ( *p == '#' )
{
p = strchr ( p, '\n' );
if ( p )
p++;
char* end = strchr ( p, '\n' );
if ( !end )
end = p + strlen(p);
string element ( p, end-p );
process_preprocessor ( filename, *h, element );
p = end;
}
else if ( *p == '}' && h->externc )
{
p++;
p = skip_ws ( p );
if ( *p == ';' ) p++;
}
else
{
char* end = findend ( p );
if ( !end )
end = p + strlen(p);
else if ( *end )
end++;
string element ( p, end-p );
bool externc = false;
char* end = findend ( p, externc );
if ( externc )
h->externc = true;
else
{
if ( !end )
end = p + strlen(p);
else if ( *end )
end++;
string element ( p, end-p );
process_c ( *h, element );
}
p = end;
printf ( "\"%s\"\n\n", binary2cstr(element).c_str() );
vector<string> names, dependencies;
bool isTypedef;
Type t = process ( element, names, isTypedef, dependencies );
printf ( "names: " );
if ( names.size() )
{
printf ( "%s", names[0].c_str() );
for ( int i = 1; i < names.size(); i++ )
printf ( ", %s", names[i].c_str() );
}
else
printf ( "(none)" );
printf ( "\n\n" );
printf ( "dependencies: " );
if ( dependencies.size() )
{
printf ( "%s", dependencies[0].c_str() );
for ( int i = 1; i < dependencies.size(); i++ )
printf ( ", %s", dependencies[i].c_str() );
}
else
printf ( "(none)" );
printf ( "\n\n" );
}
}
h->done = true;
return true;
}
string get_hdrguardtext ( const char* filename )
{
string s ( filename );
char* p = &s[0];
char* p2;
while ( (p2 = strchr(p, '\\')) )
*p2 = '/';
while ( (p2 = strchr(p,'/')) )
p = p2 + 1;
char* end = strchr ( p, '.' );
ASSERT(end);
while ( (p2 = strchr(end+1,'.')) )
end = p2;
string hdrguardtext ( p, end-p );
strupr ( &hdrguardtext[0] );
return hdrguardtext;
}
void process_preprocessor ( const char* filename, Header& h, const string& element )
{
string hdrguardtext ( get_hdrguardtext ( filename ) );
const char* p = &element[0];
ASSERT ( *p == '#' );
p++;
p = skip_ws ( p );
const char* end = p;
while ( iscsym(*end) )
end++;
string preproc ( p, end-p );
p = end+1;
p = skip_ws ( p );
if ( preproc == "include" )
{
ASSERT ( *p == '<' || *p == '\"' );
p++;
p = skip_ws ( p );
const char* end = strpbrk ( p, ">\"" );
if ( !end )
end = p + strlen(p);
while ( end > p && isspace(end[-1]) )
end--;
string include_filename ( p, end-p );
if ( is_libc_include ( include_filename ) )
h.libc_includes.push_back ( include_filename );
else
{
bool loaded = false;
for ( int i = 0; i < headers.size(); i++ )
{
if ( headers[i]->filename == include_filename )
{
if ( !headers[i]->done )
{
printf ( "circular dependency between '%s' and '%s'\n", filename, include_filename.c_str() );
exit ( -1 );
}
loaded = true;
}
}
if ( !loaded )
import_file ( include_filename.c_str() );
h.includes.push_back ( include_filename );
}
}
else if ( preproc == "define" )
{
size_t len = element.size();
if ( strstr ( element.c_str(), hdrguardtext.c_str() )
&& element[len-2] == '_'
&& element[len-1] == 'H' )
{
// header include guard... ignore!
return;
}
Symbol *s = new Symbol;
s->type = T_DEFINE;
p += 6;
p = skip_ws ( p );
const char* end = p;
while ( iscsym(*end) )
end++;
s->names.push_back ( string(p,end-p) );
s->definition = element;
h.symbols.push_back ( s );
}
else if ( preproc == "if" || preproc == "ifdef" || preproc == "ifndef" )
{
size_t len = element.size();
// check for header include guard...
if ( strstr ( element.c_str(), hdrguardtext.c_str() )
&& element[len-2] == '_'
&& element[len-1] == 'H' )
h.ifs.push_back ( string("") );
else
h.ifs.push_back ( element );
h.ifspreproc.push_back ( preproc );
}
else if ( preproc == "endif" )
{
h.ifs.pop_back();
h.ifspreproc.pop_back();
}
else if ( preproc == "else" )
{
string& oldpre = h.ifspreproc.back();
ASSERT ( oldpre != "else" );
if ( oldpre == "ifdef" )
h.ifs.back() = "ifndef";
else if ( oldpre == "ifndef" )
h.ifs.back() = "ifdef";
else if ( oldpre == "if" )
h.ifs.back() = string("!(") + h.ifs.back() + ")";
else
{
printf ( "unrecognized preproc '%s'\n", oldpre.c_str() );
ASSERT(0);
return;
}
oldpre = "else";
}
else if ( preproc == "include_next" )
{
// we can safely ignore this command...
}
else if ( preproc == "pragma" )
{
h.pragmas.push_back ( element );
}
else
{
printf ( "process_preprocessor() choked on '%s'\n", preproc.c_str() );
}
}
void process_c ( Header& h, const string& element )
{
//printf ( "\"%s\"\n\n", binary2cstr(element).c_str() );
bool isTypedef;
Symbol *s = new Symbol;
s->definition = element;
s->type = process ( element, s->names, isTypedef, s->dependencies );
for ( int i = 0; i < h.ifs.size(); i++ )
{
if ( h.ifs[i].size() )
s->ifs.push_back ( h.ifs[i] );
}
/*printf ( "names: " );
if ( s->names.size() )
{
printf ( "%s", s->names[0].c_str() );
for ( int i = 1; i < s->names.size(); i++ )
printf ( ", %s", s->names[i].c_str() );
}
else
printf ( "(none)" );
printf ( "\n\n" );
printf ( "dependencies: " );
if ( s->dependencies.size() )
{
printf ( "%s", s->dependencies[0].c_str() );
for ( int i = 1; i < s->dependencies.size(); i++ )
printf ( ", %s", s->dependencies[i].c_str() );
}
else
printf ( "(none)" );
printf ( "\n\n" );*/
h.symbols.push_back ( s );
}
char* skipsemi ( char* p )
{
if ( *p != '{' ) // }
@ -157,8 +368,32 @@ char* skipsemi ( char* p )
}
}
char* findend ( char* p )
char* findend ( char* p, bool& externc )
{
// special-case for 'extern "C"'
if ( !strncmp ( p, "extern", 6 ) )
{
char* p2 = p + 6;
p2 = skip_ws ( p2 );
if ( !strncmp ( p2, "\"C\"", 3 ) )
{
p2 += 3;
p2 = skip_ws ( p2 );
if ( *p2 == '{' )
{
externc = true;
return p2+1;
}
}
}
// special-case for 'typedef_tident'
if ( !strncmp ( p, "typedef_tident", 14 ) )
{
char* end = strchr ( p, ')' );
ASSERT(end);
return end;
}
externc = false;
for ( ;; )
{
char* end = strchr ( p, ';' );
@ -173,13 +408,25 @@ char* findend ( char* p )
Type identify ( const vector<string>& tokens, int off )
{
if ( tokens[0] == "typedef_tident" )
return T_TIDENT;
int parens = 0;
for ( int i = off; i < tokens.size(); i++ )
{
if ( tokens[i] == "(" )
parens++;
else if ( tokens[i] == "struct" && !parens )
return T_STRUCT;
else if ( (tokens[i] == "struct" || tokens[i] == "union") && !parens )
{
for ( int j = i + 1; j < tokens.size(); j++ )
{
if ( tokens[j] == "{" )
return T_STRUCT;
else if ( tokens[j] == ";" )
break;
}
}
else if ( tokens[i] == ";" )
break;
}
if ( parens > 1 )
return T_FUNCTION_PTR;
@ -217,6 +464,8 @@ int parse_type ( Type t, const vector<string>& tokens, int off, vector<string>&
{
switch ( t )
{
case T_TIDENT:
return parse_tident ( tokens, off, names, dependencies );
case T_VARIABLE:
return parse_variable ( tokens, off, names, dependencies );
case T_STRUCT:
@ -259,6 +508,15 @@ void depend ( const string& ident, vector<string>& dependencies )
dependencies.push_back ( ident );
}
int parse_tident ( const vector<string>& tokens, int off, vector<string>& names, vector<string>& dependencies )
{
ASSERT ( tokens[off] == "typedef_tident" );
ASSERT ( tokens[off+1] == "(" && tokens[off+3] == ")" );
names.push_back ( tokens[off+2] );
dependencies.push_back ( "typedef_tident" );
return off + 4;
}
int parse_variable ( const vector<string>& tokens, int off, vector<string>& names, vector<string>& dependencies )
{
// NOTE - Test with bitfields, I think this code will actually handle them properly...
@ -276,11 +534,14 @@ int parse_struct ( const vector<string>& tokens, int off, vector<string>& names,
{
int done = tokens.size();
while ( off < done && tokens[off] != "struct" )
//if ( tokens[off+1] == "_LARGE_INTEGER" )
// _CrtDbgBreak();
while ( off < done && tokens[off] != "struct" && tokens[off] != "union" )
depend ( tokens[off++], dependencies );
ASSERT ( tokens[off] == "struct" );
if ( tokens[off] != "struct" )
ASSERT ( tokens[off] == "struct" || tokens[off] == "union" );
if ( tokens[off] != "struct" && tokens[off] != "union" )
return off;
off++;
@ -292,11 +553,13 @@ int parse_struct ( const vector<string>& tokens, int off, vector<string>& names,
// skip through body of struct - noting any dependencies
int indent = 1;
while ( tokens[off] != "}" )
//if ( off >= done ) _CrtDbgBreak();
while ( off < done && tokens[off] != "}" )
{
vector<string> fauxnames;
Type t = identify ( tokens, off );
off = parse_type ( t, tokens, off, fauxnames, dependencies );
//if ( off >= done ) _CrtDbgBreak();
}
// process any trailing dependencies/names...

View file

@ -118,6 +118,10 @@ SOURCE=.\FixLFN.h
# End Source File
# Begin Source File
SOURCE=.\Header.h
# End Source File
# Begin Source File
SOURCE=.\iskeyword.cpp
# End Source File
# Begin Source File
@ -150,7 +154,15 @@ SOURCE=.\strip_comments.h
# End Source File
# Begin Source File
SOURCE=.\Symbol.h
# End Source File
# Begin Source File
SOURCE=.\tokenize.cpp
# End Source File
# Begin Source File
SOURCE=.\Type.h
# End Source File
# End Target
# End Project

View file

@ -10,3 +10,8 @@ char* skip_ws ( char* p )
{
return p + strspn ( p, ws );
}
const char* skip_ws ( const char* p )
{
return p + strspn ( p, ws );
}

View file

@ -5,4 +5,6 @@
char* skip_ws ( char* );
const char* skip_ws ( const char* );
#endif//SKIP_WS_H

View file

@ -6,6 +6,7 @@
#include <string>
#include <vector>
#include <conio.h>
#include "assert.h"
#include "tokenize.h"
@ -122,6 +123,19 @@ void tokenize ( const string& text, vector<string>& tokens )
tokens.push_back ( ";" );
p++;
break;
case '=':
switch ( p[1] )
{
case '=':
tokens.push_back ( string ( p, 2 ) );
p += 2;
break;
default:
tokens.push_back ( "=" );
p++;
break;
}
break;
case ':':
switch ( p[1] )
{
@ -189,8 +203,13 @@ void tokenize ( const string& text, vector<string>& tokens )
break;
}
break;
case '#':
while ( *p != '\n' )
p++;
break;
default:
printf ( "choked on '%c' in tokenize()\n", *p );
printf ( "choked on '%c' in tokenize() - press any key to continue\n", *p );
getch();
p++;
break;
}