mirror of
https://github.com/reactos/reactos.git
synced 2025-06-17 01:08:31 +00:00
[ASMPP] Implement asm preprocessor
This converts ML style assembly to GAS compatible syntax
This commit is contained in:
parent
7277e26944
commit
61cc62d1b2
10 changed files with 1700 additions and 18 deletions
|
@ -153,7 +153,7 @@ if(NOT CMAKE_CROSSCOMPILING)
|
||||||
add_subdirectory(sdk/tools)
|
add_subdirectory(sdk/tools)
|
||||||
add_subdirectory(sdk/lib)
|
add_subdirectory(sdk/lib)
|
||||||
|
|
||||||
set(NATIVE_TARGETS bin2c widl gendib cabman fatten hpp isohybrid mkhive mkisofs obj2bin spec2def geninc mkshelllink utf16le xml2sdb)
|
set(NATIVE_TARGETS asmpp bin2c widl gendib cabman fatten hpp isohybrid mkhive mkisofs obj2bin spec2def geninc mkshelllink utf16le xml2sdb)
|
||||||
if(NOT MSVC)
|
if(NOT MSVC)
|
||||||
list(APPEND NATIVE_TARGETS rsym pefixup)
|
list(APPEND NATIVE_TARGETS rsym pefixup)
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -467,8 +467,37 @@ function(allow_warnings __module)
|
||||||
#target_compile_options(${__module} PRIVATE "-Wno-error")
|
#target_compile_options(${__module} PRIVATE "-Wno-error")
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
|
function(convert_asm_file _source_file _target_file)
|
||||||
|
get_filename_component(_source_file_base_name ${_source_file} NAME_WE)
|
||||||
|
get_filename_component(_source_file_full_path ${_source_file} ABSOLUTE)
|
||||||
|
set(_preprocessed_asm_file ${CMAKE_CURRENT_BINARY_DIR}/${_target_file})
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT ${_preprocessed_asm_file}
|
||||||
|
COMMAND native-asmpp ${_source_file_full_path} > ${_preprocessed_asm_file}
|
||||||
|
DEPENDS native-asmpp ${_source_file_full_path})
|
||||||
|
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
function(convert_asm_files)
|
||||||
|
foreach(_source_file ${ARGN})
|
||||||
|
convert_asm_file(${_source_file} ${_source_file}.s)
|
||||||
|
endforeach()
|
||||||
|
endfunction()
|
||||||
|
|
||||||
macro(add_asm_files _target)
|
macro(add_asm_files _target)
|
||||||
list(APPEND ${_target} ${ARGN})
|
foreach(_source_file ${ARGN})
|
||||||
|
get_filename_component(_extension ${_source_file} EXT)
|
||||||
|
get_filename_component(_source_file_base_name ${_source_file} NAME_WE)
|
||||||
|
if (${_extension} STREQUAL ".asm")
|
||||||
|
convert_asm_file(${_source_file} ${_source_file}.s)
|
||||||
|
list(APPEND ${_target} ${CMAKE_CURRENT_BINARY_DIR}/${_source_file}.s)
|
||||||
|
elseif (${_extension} STREQUAL ".inc")
|
||||||
|
convert_asm_file(${_source_file} ${_source_file}.h)
|
||||||
|
list(APPEND ${_target} ${CMAKE_CURRENT_BINARY_DIR}/${_source_file}.h)
|
||||||
|
else()
|
||||||
|
list(APPEND ${_target} ${_source_file})
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|
||||||
function(add_linker_script _target _linker_script_file)
|
function(add_linker_script _target _linker_script_file)
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
include(ExternalProject)
|
include(ExternalProject)
|
||||||
|
|
||||||
function(setup_host_tools)
|
function(setup_host_tools)
|
||||||
list(APPEND HOST_TOOLS bin2c widl gendib cabman fatten hpp isohybrid mkhive mkisofs obj2bin spec2def geninc mkshelllink txt2nls utf16le xml2sdb)
|
list(APPEND HOST_TOOLS asmpp bin2c widl gendib cabman fatten hpp isohybrid mkhive mkisofs obj2bin spec2def geninc mkshelllink txt2nls utf16le xml2sdb)
|
||||||
if(NOT MSVC)
|
if(NOT MSVC)
|
||||||
list(APPEND HOST_TOOLS rsym pefixup)
|
list(APPEND HOST_TOOLS rsym pefixup)
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -475,6 +475,10 @@ macro(add_asm_files _target)
|
||||||
get_includes(_directory_includes)
|
get_includes(_directory_includes)
|
||||||
get_directory_property(_defines COMPILE_DEFINITIONS)
|
get_directory_property(_defines COMPILE_DEFINITIONS)
|
||||||
foreach(_source_file ${ARGN})
|
foreach(_source_file ${ARGN})
|
||||||
|
get_filename_component(_extension ${_source_file} EXT)
|
||||||
|
if (("${_extension}" STREQUAL ".asm") OR ("${_extension}" STREQUAL ".inc"))
|
||||||
|
list(APPEND ${_target} ${_source_file})
|
||||||
|
else()
|
||||||
get_filename_component(_source_file_base_name ${_source_file} NAME_WE)
|
get_filename_component(_source_file_base_name ${_source_file} NAME_WE)
|
||||||
get_filename_component(_source_file_full_path ${_source_file} ABSOLUTE)
|
get_filename_component(_source_file_full_path ${_source_file} ABSOLUTE)
|
||||||
set(_preprocessed_asm_file ${CMAKE_CURRENT_BINARY_DIR}/asm/${_source_file_base_name}_${_target}.asm)
|
set(_preprocessed_asm_file ${CMAKE_CURRENT_BINARY_DIR}/asm/${_source_file_base_name}_${_target}.asm)
|
||||||
|
@ -490,6 +494,7 @@ macro(add_asm_files _target)
|
||||||
COMMAND cl /nologo /X /I${REACTOS_SOURCE_DIR}/sdk/include/asm /I${REACTOS_BINARY_DIR}/sdk/include/asm ${_directory_includes} ${_source_file_defines} ${_directory_defines} /D__ASM__ /D_USE_ML /EP /c ${_source_file_full_path} > ${_preprocessed_asm_file}
|
COMMAND cl /nologo /X /I${REACTOS_SOURCE_DIR}/sdk/include/asm /I${REACTOS_BINARY_DIR}/sdk/include/asm ${_directory_includes} ${_source_file_defines} ${_directory_defines} /D__ASM__ /D_USE_ML /EP /c ${_source_file_full_path} > ${_preprocessed_asm_file}
|
||||||
DEPENDS ${_source_file_full_path})
|
DEPENDS ${_source_file_full_path})
|
||||||
list(APPEND ${_target} ${_preprocessed_asm_file})
|
list(APPEND ${_target} ${_preprocessed_asm_file})
|
||||||
|
endif()
|
||||||
endforeach()
|
endforeach()
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,7 @@ target_link_libraries(obj2bin PRIVATE host_includes)
|
||||||
add_host_tool(spec2def spec2def/spec2def.c)
|
add_host_tool(spec2def spec2def/spec2def.c)
|
||||||
add_host_tool(utf16le utf16le/utf16le.cpp)
|
add_host_tool(utf16le utf16le/utf16le.cpp)
|
||||||
|
|
||||||
|
add_subdirectory(asmpp)
|
||||||
add_subdirectory(cabman)
|
add_subdirectory(cabman)
|
||||||
add_subdirectory(fatten)
|
add_subdirectory(fatten)
|
||||||
add_subdirectory(hhpcomp)
|
add_subdirectory(hhpcomp)
|
||||||
|
|
8
sdk/tools/asmpp/CMakeLists.txt
Normal file
8
sdk/tools/asmpp/CMakeLists.txt
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
|
||||||
|
list(APPEND SOURCE
|
||||||
|
asmpp.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
add_host_tool(asmpp ${SOURCE})
|
||||||
|
target_link_libraries(asmpp PRIVATE host_includes)
|
||||||
|
set_property(TARGET asmpp PROPERTY CXX_STANDARD 11)
|
1208
sdk/tools/asmpp/asmpp.cpp
Normal file
1208
sdk/tools/asmpp/asmpp.cpp
Normal file
File diff suppressed because it is too large
Load diff
31
sdk/tools/asmpp/asmpp.sln
Normal file
31
sdk/tools/asmpp/asmpp.sln
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
|
||||||
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
|
# Visual Studio Version 16
|
||||||
|
VisualStudioVersion = 16.0.32510.428
|
||||||
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
|
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "asmpp", "asmpp.vcxproj", "{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}"
|
||||||
|
EndProject
|
||||||
|
Global
|
||||||
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
|
Debug|x64 = Debug|x64
|
||||||
|
Debug|x86 = Debug|x86
|
||||||
|
Release|x64 = Release|x64
|
||||||
|
Release|x86 = Release|x86
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||||
|
{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Debug|x64.ActiveCfg = Debug|x64
|
||||||
|
{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Debug|x64.Build.0 = Debug|x64
|
||||||
|
{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Debug|x86.ActiveCfg = Debug|Win32
|
||||||
|
{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Debug|x86.Build.0 = Debug|Win32
|
||||||
|
{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Release|x64.ActiveCfg = Release|x64
|
||||||
|
{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Release|x64.Build.0 = Release|x64
|
||||||
|
{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Release|x86.ActiveCfg = Release|Win32
|
||||||
|
{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Release|x86.Build.0 = Release|Win32
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
|
HideSolutionNode = FALSE
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||||
|
SolutionGuid = {16936D9E-6E98-4126-8918-03218BC19061}
|
||||||
|
EndGlobalSection
|
||||||
|
EndGlobal
|
151
sdk/tools/asmpp/asmpp.vcxproj
Normal file
151
sdk/tools/asmpp/asmpp.vcxproj
Normal file
|
@ -0,0 +1,151 @@
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
|
<PropertyGroup Label="Globals">
|
||||||
|
<VCProjectVersion>16.0</VCProjectVersion>
|
||||||
|
<Keyword>Win32Proj</Keyword>
|
||||||
|
<ProjectGuid>{a1f7c9ee-4f70-43cd-a0be-85d137b80583}</ProjectGuid>
|
||||||
|
<RootNamespace>asmpp2</RootNamespace>
|
||||||
|
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||||
|
</PropertyGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>true</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v142</PlatformToolset>
|
||||||
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>false</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v142</PlatformToolset>
|
||||||
|
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||||
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>true</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v142</PlatformToolset>
|
||||||
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>false</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v142</PlatformToolset>
|
||||||
|
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||||
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
|
<ImportGroup Label="ExtensionSettings">
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="Shared">
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<PropertyGroup Label="UserMacros" />
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
|
<LinkIncremental>true</LinkIncremental>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
|
<LinkIncremental>false</LinkIncremental>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
|
<LinkIncremental>true</LinkIncremental>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
|
<LinkIncremental>false</LinkIncremental>
|
||||||
|
</PropertyGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<ConformanceMode>true</ConformanceMode>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<SubSystem>Console</SubSystem>
|
||||||
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<ConformanceMode>true</ConformanceMode>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<SubSystem>Console</SubSystem>
|
||||||
|
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||||
|
<OptimizeReferences>true</OptimizeReferences>
|
||||||
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<ConformanceMode>true</ConformanceMode>
|
||||||
|
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<SubSystem>Console</SubSystem>
|
||||||
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<ConformanceMode>true</ConformanceMode>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<SubSystem>Console</SubSystem>
|
||||||
|
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||||
|
<OptimizeReferences>true</OptimizeReferences>
|
||||||
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClCompile Include="asmpp.cpp" />
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClInclude Include="tokenizer.hpp" />
|
||||||
|
</ItemGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
|
<ImportGroup Label="ExtensionTargets">
|
||||||
|
</ImportGroup>
|
||||||
|
</Project>
|
249
sdk/tools/asmpp/tokenizer.hpp
Normal file
249
sdk/tools/asmpp/tokenizer.hpp
Normal file
|
@ -0,0 +1,249 @@
|
||||||
|
/*
|
||||||
|
* PROJECT: ReactOS host tools
|
||||||
|
* LICENSE: MIT (https://spdx.org/licenses/MIT)
|
||||||
|
* PURPOSE: Tokenizer class implementation
|
||||||
|
* COPYRIGHT: Copyright 2021 Timo Kreuzer <timo.kreuzer@reactos.org>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <fstream>
|
||||||
|
#include <regex>
|
||||||
|
#include <ctime>
|
||||||
|
|
||||||
|
// Uncomment this for easier debugging
|
||||||
|
#if 0
|
||||||
|
#define throw __debugbreak(); throw
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern time_t search_time;
|
||||||
|
|
||||||
|
struct TOKEN_DEF
|
||||||
|
{
|
||||||
|
int Type;
|
||||||
|
std::string RegExString;
|
||||||
|
};
|
||||||
|
|
||||||
|
class Token
|
||||||
|
{
|
||||||
|
const std::string& m_text;
|
||||||
|
unsigned int m_pos;
|
||||||
|
unsigned int m_len;
|
||||||
|
#if _DEBUG
|
||||||
|
std::string m_dbgstr;
|
||||||
|
#endif
|
||||||
|
int m_type;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
Token(const std::string& text, size_t pos, size_t len, int type)
|
||||||
|
: m_text(text),
|
||||||
|
m_pos(static_cast<unsigned int>(pos)),
|
||||||
|
m_len(static_cast<unsigned int>(len)),
|
||||||
|
m_type(type)
|
||||||
|
{
|
||||||
|
#if _DEBUG
|
||||||
|
m_dbgstr = str();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string str() const
|
||||||
|
{
|
||||||
|
return m_text.substr(m_pos, m_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
int type() const
|
||||||
|
{
|
||||||
|
return m_type;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Tokenizer
|
||||||
|
{
|
||||||
|
const std::vector<TOKEN_DEF> &m_tokendefs;
|
||||||
|
const std::regex m_re;
|
||||||
|
|
||||||
|
typedef int myint;
|
||||||
|
|
||||||
|
static
|
||||||
|
unsigned int
|
||||||
|
count_captures(const std::string& exp)
|
||||||
|
{
|
||||||
|
bool in_char_group = false;
|
||||||
|
unsigned int count = 0;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < exp.size(); i++)
|
||||||
|
{
|
||||||
|
char c = exp[i];
|
||||||
|
|
||||||
|
// Skip escaped characters
|
||||||
|
if (c == '\\')
|
||||||
|
{
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_char_group)
|
||||||
|
{
|
||||||
|
if (c == ']')
|
||||||
|
{
|
||||||
|
in_char_group = false;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == '[')
|
||||||
|
{
|
||||||
|
in_char_group = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == '(')
|
||||||
|
{
|
||||||
|
if (exp[i + 1] != '?')
|
||||||
|
{
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
std::regex
|
||||||
|
CompileMultiRegex(const std::vector<TOKEN_DEF> &tokendefs)
|
||||||
|
{
|
||||||
|
std::string combinedString;
|
||||||
|
|
||||||
|
if (tokendefs.size() == 0)
|
||||||
|
{
|
||||||
|
return std::regex();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate all token definitions
|
||||||
|
for (auto def : tokendefs)
|
||||||
|
{
|
||||||
|
size_t found = -1;
|
||||||
|
|
||||||
|
// Count capture groups
|
||||||
|
unsigned int count = count_captures(def.RegExString);
|
||||||
|
if (count != 1)
|
||||||
|
{
|
||||||
|
throw "invalid count!\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Combine all expressions into one (one capture group for each)
|
||||||
|
combinedString = "(?:" + tokendefs[0].RegExString + ")";
|
||||||
|
for (size_t i = 1; i < tokendefs.size(); i++)
|
||||||
|
{
|
||||||
|
combinedString += "|(?:" + tokendefs[i].RegExString + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::regex(combinedString, std::regex_constants::icase);
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
struct TOKEN_REF
|
||||||
|
{
|
||||||
|
unsigned int pos;
|
||||||
|
unsigned int len;
|
||||||
|
int type;
|
||||||
|
};
|
||||||
|
|
||||||
|
Tokenizer(std::vector<TOKEN_DEF> &tokendefs)
|
||||||
|
: m_tokendefs(tokendefs),
|
||||||
|
m_re(CompileMultiRegex(tokendefs))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
TOKEN_REF match(std::smatch &matches, const std::string& str) const
|
||||||
|
{
|
||||||
|
return match(matches, str, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TOKEN_REF match(std::smatch &matches, const std::string &str, size_t startpos) const
|
||||||
|
{
|
||||||
|
const std::string::const_iterator first = str.cbegin() + startpos;
|
||||||
|
const std::string::const_iterator last = str.cend();
|
||||||
|
|
||||||
|
// If we reached the end, there is nothing more to do
|
||||||
|
if (first == last)
|
||||||
|
{
|
||||||
|
return TOKEN_REF{ static_cast<unsigned int>(startpos), 0, -1 };
|
||||||
|
}
|
||||||
|
|
||||||
|
time_t start_time = time(NULL);
|
||||||
|
|
||||||
|
// Try to find a match
|
||||||
|
if (!std::regex_search(first, last, matches, m_re))
|
||||||
|
{
|
||||||
|
throw "Failed to match\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
search_time += time(NULL) - start_time;
|
||||||
|
|
||||||
|
// Validate that it's at the start of the string
|
||||||
|
if (matches.prefix().matched)
|
||||||
|
{
|
||||||
|
throw "Failed to match at current position!\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
// We have a match, check which one it is
|
||||||
|
for (size_t i = 1; i < matches.size(); i++)
|
||||||
|
{
|
||||||
|
if (matches[i].matched)
|
||||||
|
{
|
||||||
|
unsigned int len = static_cast<unsigned int>(matches.length(i));
|
||||||
|
int type = m_tokendefs[i - 1].Type;
|
||||||
|
return TOKEN_REF{ static_cast<unsigned int>(startpos), len, type};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We should never get here
|
||||||
|
throw "Something went wrong!\n";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class TokenList
|
||||||
|
{
|
||||||
|
using TOKEN_REF = typename Tokenizer::TOKEN_REF;
|
||||||
|
|
||||||
|
const Tokenizer& m_tokenizer;
|
||||||
|
const std::string& m_text;
|
||||||
|
std::vector<TOKEN_REF> m_tokens;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
TokenList(const Tokenizer& tokenizer, const std::string& text)
|
||||||
|
: m_tokenizer(tokenizer),
|
||||||
|
m_text(text)
|
||||||
|
{
|
||||||
|
size_t startpos = 0;
|
||||||
|
size_t len = m_text.size();
|
||||||
|
std::smatch matches;
|
||||||
|
|
||||||
|
m_tokens.reserve(len / 5);
|
||||||
|
|
||||||
|
while (startpos < len)
|
||||||
|
{
|
||||||
|
TOKEN_REF tref = m_tokenizer.match(matches, m_text, startpos);
|
||||||
|
m_tokens.push_back(tref);
|
||||||
|
startpos += tref.len;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t size() const
|
||||||
|
{
|
||||||
|
return m_tokens.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
Token operator[](size_t n) const
|
||||||
|
{
|
||||||
|
return Token(m_text, m_tokens[n].pos, m_tokens[n].len, m_tokens[n].type);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
Loading…
Add table
Add a link
Reference in a new issue