From 61cc62d1b20842b4c612aa0c4a35982dd38ef6cc Mon Sep 17 00:00:00 2001 From: Timo Kreuzer Date: Mon, 20 Jun 2022 18:29:01 +0300 Subject: [PATCH] [ASMPP] Implement asm preprocessor This converts ML style assembly to GAS compatible syntax --- CMakeLists.txt | 2 +- sdk/cmake/gcc.cmake | 31 +- sdk/cmake/host-tools.cmake | 2 +- sdk/cmake/msvc.cmake | 35 +- sdk/tools/CMakeLists.txt | 1 + sdk/tools/asmpp/CMakeLists.txt | 8 + sdk/tools/asmpp/asmpp.cpp | 1208 ++++++++++++++++++++++++++++++++ sdk/tools/asmpp/asmpp.sln | 31 + sdk/tools/asmpp/asmpp.vcxproj | 151 ++++ sdk/tools/asmpp/tokenizer.hpp | 249 +++++++ 10 files changed, 1700 insertions(+), 18 deletions(-) create mode 100644 sdk/tools/asmpp/CMakeLists.txt create mode 100644 sdk/tools/asmpp/asmpp.cpp create mode 100644 sdk/tools/asmpp/asmpp.sln create mode 100644 sdk/tools/asmpp/asmpp.vcxproj create mode 100644 sdk/tools/asmpp/tokenizer.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 3b6cfae73a5..3a581a80ed2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -153,7 +153,7 @@ if(NOT CMAKE_CROSSCOMPILING) add_subdirectory(sdk/tools) add_subdirectory(sdk/lib) - set(NATIVE_TARGETS bin2c widl gendib cabman fatten hpp isohybrid mkhive mkisofs obj2bin spec2def geninc mkshelllink utf16le xml2sdb) + set(NATIVE_TARGETS asmpp bin2c widl gendib cabman fatten hpp isohybrid mkhive mkisofs obj2bin spec2def geninc mkshelllink utf16le xml2sdb) if(NOT MSVC) list(APPEND NATIVE_TARGETS rsym pefixup) endif() diff --git a/sdk/cmake/gcc.cmake b/sdk/cmake/gcc.cmake index bc72c977773..03b481b0140 100644 --- a/sdk/cmake/gcc.cmake +++ b/sdk/cmake/gcc.cmake @@ -467,8 +467,37 @@ function(allow_warnings __module) #target_compile_options(${__module} PRIVATE "-Wno-error") endfunction() +function(convert_asm_file _source_file _target_file) + get_filename_component(_source_file_base_name ${_source_file} NAME_WE) + get_filename_component(_source_file_full_path ${_source_file} ABSOLUTE) + set(_preprocessed_asm_file ${CMAKE_CURRENT_BINARY_DIR}/${_target_file}) + add_custom_command( + OUTPUT ${_preprocessed_asm_file} + COMMAND native-asmpp ${_source_file_full_path} > ${_preprocessed_asm_file} + DEPENDS native-asmpp ${_source_file_full_path}) + +endfunction() + +function(convert_asm_files) + foreach(_source_file ${ARGN}) + convert_asm_file(${_source_file} ${_source_file}.s) + endforeach() +endfunction() + macro(add_asm_files _target) - list(APPEND ${_target} ${ARGN}) + foreach(_source_file ${ARGN}) + get_filename_component(_extension ${_source_file} EXT) + get_filename_component(_source_file_base_name ${_source_file} NAME_WE) + if (${_extension} STREQUAL ".asm") + convert_asm_file(${_source_file} ${_source_file}.s) + list(APPEND ${_target} ${CMAKE_CURRENT_BINARY_DIR}/${_source_file}.s) + elseif (${_extension} STREQUAL ".inc") + convert_asm_file(${_source_file} ${_source_file}.h) + list(APPEND ${_target} ${CMAKE_CURRENT_BINARY_DIR}/${_source_file}.h) + else() + list(APPEND ${_target} ${_source_file}) + endif() + endforeach() endmacro() function(add_linker_script _target _linker_script_file) diff --git a/sdk/cmake/host-tools.cmake b/sdk/cmake/host-tools.cmake index a3e7947cd82..b7932a82bb8 100644 --- a/sdk/cmake/host-tools.cmake +++ b/sdk/cmake/host-tools.cmake @@ -2,7 +2,7 @@ include(ExternalProject) function(setup_host_tools) - list(APPEND HOST_TOOLS bin2c widl gendib cabman fatten hpp isohybrid mkhive mkisofs obj2bin spec2def geninc mkshelllink txt2nls utf16le xml2sdb) + list(APPEND HOST_TOOLS asmpp bin2c widl gendib cabman fatten hpp isohybrid mkhive mkisofs obj2bin spec2def geninc mkshelllink txt2nls utf16le xml2sdb) if(NOT MSVC) list(APPEND HOST_TOOLS rsym pefixup) endif() diff --git a/sdk/cmake/msvc.cmake b/sdk/cmake/msvc.cmake index e19e8e8b6ff..4e64db9d3aa 100644 --- a/sdk/cmake/msvc.cmake +++ b/sdk/cmake/msvc.cmake @@ -475,21 +475,26 @@ macro(add_asm_files _target) get_includes(_directory_includes) get_directory_property(_defines COMPILE_DEFINITIONS) foreach(_source_file ${ARGN}) - get_filename_component(_source_file_base_name ${_source_file} NAME_WE) - get_filename_component(_source_file_full_path ${_source_file} ABSOLUTE) - set(_preprocessed_asm_file ${CMAKE_CURRENT_BINARY_DIR}/asm/${_source_file_base_name}_${_target}.asm) - get_source_file_property(_defines_semicolon_list ${_source_file_full_path} COMPILE_DEFINITIONS) - unset(_source_file_defines) - foreach(_define ${_defines_semicolon_list}) - if(NOT ${_define} STREQUAL "NOTFOUND") - list(APPEND _source_file_defines -D${_define}) - endif() - endforeach() - add_custom_command( - OUTPUT ${_preprocessed_asm_file} - COMMAND cl /nologo /X /I${REACTOS_SOURCE_DIR}/sdk/include/asm /I${REACTOS_BINARY_DIR}/sdk/include/asm ${_directory_includes} ${_source_file_defines} ${_directory_defines} /D__ASM__ /D_USE_ML /EP /c ${_source_file_full_path} > ${_preprocessed_asm_file} - DEPENDS ${_source_file_full_path}) - list(APPEND ${_target} ${_preprocessed_asm_file}) + get_filename_component(_extension ${_source_file} EXT) + if (("${_extension}" STREQUAL ".asm") OR ("${_extension}" STREQUAL ".inc")) + list(APPEND ${_target} ${_source_file}) + else() + get_filename_component(_source_file_base_name ${_source_file} NAME_WE) + get_filename_component(_source_file_full_path ${_source_file} ABSOLUTE) + set(_preprocessed_asm_file ${CMAKE_CURRENT_BINARY_DIR}/asm/${_source_file_base_name}_${_target}.asm) + get_source_file_property(_defines_semicolon_list ${_source_file_full_path} COMPILE_DEFINITIONS) + unset(_source_file_defines) + foreach(_define ${_defines_semicolon_list}) + if(NOT ${_define} STREQUAL "NOTFOUND") + list(APPEND _source_file_defines -D${_define}) + endif() + endforeach() + add_custom_command( + OUTPUT ${_preprocessed_asm_file} + COMMAND cl /nologo /X /I${REACTOS_SOURCE_DIR}/sdk/include/asm /I${REACTOS_BINARY_DIR}/sdk/include/asm ${_directory_includes} ${_source_file_defines} ${_directory_defines} /D__ASM__ /D_USE_ML /EP /c ${_source_file_full_path} > ${_preprocessed_asm_file} + DEPENDS ${_source_file_full_path}) + list(APPEND ${_target} ${_preprocessed_asm_file}) + endif() endforeach() endmacro() diff --git a/sdk/tools/CMakeLists.txt b/sdk/tools/CMakeLists.txt index 535712d171d..1df7a8adbcf 100644 --- a/sdk/tools/CMakeLists.txt +++ b/sdk/tools/CMakeLists.txt @@ -27,6 +27,7 @@ target_link_libraries(obj2bin PRIVATE host_includes) add_host_tool(spec2def spec2def/spec2def.c) add_host_tool(utf16le utf16le/utf16le.cpp) +add_subdirectory(asmpp) add_subdirectory(cabman) add_subdirectory(fatten) add_subdirectory(hhpcomp) diff --git a/sdk/tools/asmpp/CMakeLists.txt b/sdk/tools/asmpp/CMakeLists.txt new file mode 100644 index 00000000000..d65a7c4ca28 --- /dev/null +++ b/sdk/tools/asmpp/CMakeLists.txt @@ -0,0 +1,8 @@ + +list(APPEND SOURCE + asmpp.cpp +) + +add_host_tool(asmpp ${SOURCE}) +target_link_libraries(asmpp PRIVATE host_includes) +set_property(TARGET asmpp PROPERTY CXX_STANDARD 11) diff --git a/sdk/tools/asmpp/asmpp.cpp b/sdk/tools/asmpp/asmpp.cpp new file mode 100644 index 00000000000..39f7bdbfc70 --- /dev/null +++ b/sdk/tools/asmpp/asmpp.cpp @@ -0,0 +1,1208 @@ +/* + * PROJECT: ReactOS host tools + * LICENSE: MIT (https://spdx.org/licenses/MIT) + * PURPOSE: ASM preprocessor + * COPYRIGHT: Copyright 2021 Timo Kreuzer + */ + +// Optimize even on debug builds, because otherwise it's ridiculously slow +#ifdef _MSC_VER +#pragma optimize("gst", on) +#pragma auto_inline(on) +#else +#pragma GCC optimize("O3,inline") +#endif + +#include "tokenizer.hpp" +#include +#include +#include +#include + +#define PROFILING_ENABLED 0 + +using namespace std; + +time_t search_time; + +enum TOKEN_TYPE +{ + Invalid = -1, + Eof, + WhiteSpace, + NewLine, + Comment, + DecNumber, + HexNumber, + String, + + BraceOpen, + BraceClose, + MemRefStart, + MemRefEnd, + Colon, + Operator, + StringDef, + + KW_include, + KW_const, + KW_code, + KW_endprolog, + KW_ALIGN, + KW_EXTERN, + KW_PUBLIC, + KW_ENDM, + KW_END, + KW_if, + KW_ifdef, + KW_ifndef, + KW_else, + KW_endif, + + KW_allocstack, + KW_savereg, + KW_savexmm128, + + KW_DB, + KW_DW, + KW_DD, + KW_DQ, + KW_EQU, + KW_TEXTEQU, + KW_MACRO, + KW_PROC, + KW_FRAME, + KW_ENDP, + KW_RECORD, + + KW_MASK, + KW_ERRDEF, + + Filename, + Instruction, + Reg8, + Reg16, + Reg32, + Reg64, + RegXmm, + BYTE_PTR, + WORD_PTR, + DWORD_PTR, + QWORD_PTR, + XMMWORD_PTR, + + LabelName, + Identifier +}; + +int fake_printf(const char* format, ...) +{ + return 0; +} + +//#define printf fake_printf + +// Use a look-ahead for following characters, not included into the match +//#define FOLLOWED_BY(x) R"((?=)" x R"())" +#define FOLLOWED_BY(x) x + +#define ANY_CHAR R"((?:.|\n))" +#define WHITESPACE R"((?:[ \t]++))" +#define NEWLINE R"([\n])" +#define WS_OR_NL R"((?:)" WHITESPACE "|" NEWLINE R"()+)" +#define SEPARATOR R"([\s,\=\+\-\*\/\:\~\[\]])" + +#define INSTRUCTION \ + "AAA|AAD|AAM|AAS|ADC|ADCX|ADD|ADDPD|ADDPS|ADDSD|ADDSS|ADDSUBPD|ADDSUBPS|" \ + "ADOX|AESDEC|AESDECLAST|AESENC|AESENCLAST|AESIMC|AESKEYGENASSIST|AND|ANDN|" \ + "ANDNPD|ANDNPS|ANDPD|ANDPS|ARPL|BEXTR|BLENDPD|BLENDPS|BLENDVPD|BLENDVPS|" \ + "BLSI|BLSMSK|BLSR|BNDCL|BNDCN|BNDCU|BNDLDX|BNDMK|BNDMOV|BNDSTX|BOUND|BSF|" \ + "BSR|BSWAP|BT|BTC|BTR|BTS|BZHI|CALL|CBW|CDQ|CDQE|CLAC|CLC|CLD|CLDEMOTE|" \ + "CLFLUSH|CLFLUSHOPT|CLI|CLTS|CLWB|CMC|CMOVcc|CMP|CMPPD|CMPPS|CMPS|CMPSB|" \ + "CMPSD|CMPSQ|CMPSS|CMPSW|CMPXCHG|CMPXCHG16B|CMPXCHG8B|COMISD|COMISS|CPUID|" \ + "CQO|CRC32|CVTDQ2PD|CVTDQ2PS|CVTPD2DQ|CVTPD2PI|CVTPD2PS|CVTPI2PD|CVTPI2PS|" \ + "CVTPS2DQ|CVTPS2PD|CVTPS2PI|CVTSD2SI|CVTSD2SS|CVTSI2SD|CVTSI2SS|CVTSS2SD|" \ + "CVTSS2SI|CVTTPD2DQ|CVTTPD2PI|CVTTPS2DQ|CVTTPS2PI|CVTTSD2SI|CVTTSS2SI|CWD|" \ + "CWDE|DAA|DAS|DEC|DIV|DIVPD|DIVPS|DIVSD|DIVSS|DPPD|DPPS|EMMS|ENTER|" \ + "EXTRACTPS|F2XM1|FABS|FADD|FADDP|FBLD|FBSTP|FCHS|FCLEX|FCMOVcc|FCOM|FCOMI|" \ + "FCOMIP|FCOMP|FCOMPP|FCOS|FDECSTP|FDIV|FDIVP|FDIVR|FDIVRP|FFREE|FIADD|" \ + "FICOM|FICOMP|FIDIV|FIDIVR|FILD|FIMUL|FINCSTP|FINIT|FIST|FISTP|FISTTP|" \ + "FISUB|FISUBR|FLD|FLD1|FLDCW|FLDENV|FLDL2E|FLDL2T|FLDLG2|FLDLN2|FLDPI|" \ + "FLDZ|FMUL|FMULP|FNCLEX|FNINIT|FNOP|FNSAVE|FNSTCW|FNSTENV|FNSTSW|FPATAN|" \ + "FPREM|FPREM1|FPTAN|FRNDINT|FRSTOR|FSAVE|FSCALE|FSIN|FSINCOS|FSQRT|FST|" \ + "FSTCW|FSTENV|FSTP|FSTSW|FSUB|FSUBP|FSUBR|FSUBRP|FTST|FUCOM|FUCOMI|" \ + "FUCOMIP|FUCOMP|FUCOMPP|FWAIT|FXAM|FXCH|FXRSTOR|FXSAVE|FXTRACT|FYL2X|" \ + "FYL2XP1|GF2P8AFFINEINVQB|GF2P8AFFINEQB|GF2P8MULB|HADDPD|HADDPS|HLT|" \ + "HSUBPD|HSUBPS|IDIV|IMUL|IN|INC|INS|INSB|INSD|INSERTPS|INSW|INT|INT1|INT3|" \ + "INTO|INVD|INVLPG|INVPCID|IRET|IRETD|JMP|Jcc|KADDB|KADDD|KADDQ|KADDW|" \ + "KANDB|KANDD|KANDNB|KANDND|KANDNQ|KANDNW|KANDQ|KANDW|KMOVB|KMOVD|KMOVQ|" \ + "KMOVW|KNOTB|KNOTD|KNOTQ|KNOTW|KORB|KORD|KORQ|KORTESTB|KORTESTD|KORTESTQ|" \ + "KORTESTW|KORW|KSHIFTLB|KSHIFTLD|KSHIFTLQ|KSHIFTLW|KSHIFTRB|KSHIFTRD|" \ + "KSHIFTRQ|KSHIFTRW|KTESTB|KTESTD|KTESTQ|KTESTW|KUNPCKBW|KUNPCKDQ|KUNPCKWD|" \ + "KXNORB|KXNORD|KXNORQ|KXNORW|KXORB|KXORD|KXORQ|KXORW|LAHF|LAR|LDDQU|" \ + "LDMXCSR|LDS|LEA|LEAVE|LES|LFENCE|LFS|LGDT|LGS|LIDT|LLDT|LMSW|LOCK|LODS|" \ + "LODSB|LODSD|LODSQ|LODSW|LOOP|LOOPcc|LSL|LSS|LTR|LZCNT|MASKMOVDQU|MASKMOVQ|" \ + "MAXPD|MAXPS|MAXSD|MAXSS|MFENCE|MINPD|MINPS|MINSD|MINSS|MONITOR|MOV|MOVAPD|" \ + "MOVAPS|MOVBE|MOVD|MOVDDUP|MOVDIR64B|MOVDIRI|MOVDQ2Q|MOVDQA|MOVDQU|MOVHLPS|" \ + "MOVHPD|MOVHPS|MOVLHPS|MOVLPD|MOVLPS|MOVMSKPD|MOVMSKPS|MOVNTDQ|MOVNTDQA|" \ + "MOVNTI|MOVNTPD|MOVNTPS|MOVNTQ|MOVQ|MOVQ2DQ|MOVS|MOVSB|MOVSD|MOVSHDUP|" \ + "MOVSLDUP|MOVSQ|MOVSS|MOVSW|MOVSX|MOVSXD|MOVUPD|MOVUPS|MOVZX|MPSADBW|MUL|" \ + "MULPD|MULPS|MULSD|MULSS|MULX|MWAIT|NEG|NOP|NOT|OR|ORPD|ORPS|OUT|OUTS|" \ + "OUTSB|OUTSD|OUTSW|PABSB|PABSD|PABSQ|PABSW|PACKSSDW|PACKSSWB|PACKUSDW|" \ + "PACKUSWB|PADDB|PADDD|PADDQ|PADDSB|PADDSW|PADDUSB|PADDUSW|PADDW|PALIGNR|" \ + "PAND|PANDN|PAUSE|PAVGB|PAVGW|PBLENDVB|PBLENDW|PCLMULQDQ|PCMPEQB|PCMPEQD|" \ + "PCMPEQQ|PCMPEQW|PCMPESTRI|PCMPESTRM|PCMPGTB|PCMPGTD|PCMPGTQ|PCMPGTW|" \ + "PCMPISTRI|PCMPISTRM|PDEP|PEXT|PEXTRB|PEXTRD|PEXTRQ|PEXTRW|PHADDD|PHADDSW|" \ + "PHADDW|PHMINPOSUW|PHSUBD|PHSUBSW|PHSUBW|PINSRB|PINSRD|PINSRQ|PINSRW|" \ + "PMADDUBSW|PMADDWD|PMAXSB|PMAXSD|PMAXSQ|PMAXSW|PMAXUB|PMAXUD|PMAXUQ|PMAXUW|" \ + "PMINSB|PMINSD|PMINSQ|PMINSW|PMINUB|PMINUD|PMINUQ|PMINUW|PMOVMSKB|PMOVSX|" \ + "PMOVZX|PMULDQ|PMULHRSW|PMULHUW|PMULHW|PMULLD|PMULLQ|PMULLW|PMULUDQ|POP|" \ + "POPA|POPAD|POPCNT|POPF|POPFD|POPFQ|POR|PREFETCHW|PREFETCHh|PSADBW|PSHUFB|" \ + "PSHUFD|PSHUFHW|PSHUFLW|PSHUFW|PSIGNB|PSIGND|PSIGNW|PSLLD|PSLLDQ|PSLLQ|" \ + "PSLLW|PSRAD|PSRAQ|PSRAW|PSRLD|PSRLDQ|PSRLQ|PSRLW|PSUBB|PSUBD|PSUBQ|PSUBSB|" \ + "PSUBSW|PSUBUSB|PSUBUSW|PSUBW|PTEST|PTWRITE|PUNPCKHBW|PUNPCKHDQ|PUNPCKHQDQ|" \ + "PUNPCKHWD|PUNPCKLBW|PUNPCKLDQ|PUNPCKLQDQ|PUNPCKLWD|PUSH|PUSHA|PUSHAD|" \ + "PUSHF|PUSHFD|PUSHFQ|PXOR|RCL|RCPPS|RCPSS|RCR|RDFSBASE|RDGSBASE|RDMSR|" \ + "RDPID|RDPKRU|RDPMC|RDRAND|RDSEED|RDTSC|RDTSCP|REP|REPE|REPNE|REPNZ|REPZ|" \ + "RET|ROL|ROR|RORX|ROUNDPD|ROUNDPS|ROUNDSD|ROUNDSS|RSM|RSQRTPS|RSQRTSS|SAHF|" \ + "SAL|SAR|SARX|SBB|SCAS|SCASB|SCASD|SCASW|SETcc|SFENCE|SGDT|SHA1MSG1|" \ + "SHA1MSG2|SHA1NEXTE|SHA1RNDS4|SHA256MSG1|SHA256MSG2|SHA256RNDS2|SHL|SHLD|" \ + "SHLX|SHR|SHRD|SHRX|SHUFPD|SHUFPS|SIDT|SLDT|SMSW|SQRTPD|SQRTPS|SQRTSD|" \ + "SQRTSS|STAC|STC|STD|STI|STMXCSR|STOS|STOSB|STOSD|STOSQ|STOSW|STR|SUB|" \ + "SUBPD|SUBPS|SUBSD|SUBSS|SWAPGS|SYSCALL|SYSENTER|SYSEXIT|SYSRET|TEST|" \ + "TPAUSE|TZCNT|UCOMISD|UCOMISS|UD|UMONITOR|UMWAIT|UNPCKHPD|UNPCKHPS|" \ + "UNPCKLPD|UNPCKLPS|VALIGND|VALIGNQ|VBLENDMPD|VBLENDMPS|VBROADCAST|" \ + "VCOMPRESSPD|VCOMPRESSPS|VCVTPD2QQ|VCVTPD2UDQ|VCVTPD2UQQ|VCVTPH2PS|" \ + "VCVTPS2PH|VCVTPS2QQ|VCVTPS2UDQ|VCVTPS2UQQ|VCVTQQ2PD|VCVTQQ2PS|VCVTSD2USI|" \ + "VCVTSS2USI|VCVTTPD2QQ|VCVTTPD2UDQ|VCVTTPD2UQQ|VCVTTPS2QQ|VCVTTPS2UDQ|" \ + "VCVTTPS2UQQ|VCVTTSD2USI|VCVTTSS2USI|VCVTUDQ2PD|VCVTUDQ2PS|VCVTUQQ2PD|" \ + "VCVTUQQ2PS|VCVTUSI2SD|VCVTUSI2SS|VDBPSADBW|VERR|VERW|VEXPANDPD|VEXPANDPS|" \ + "VEXTRACTF128|VEXTRACTF32x4|VEXTRACTF32x8|VEXTRACTF64x2|VEXTRACTF64x4|" \ + "VEXTRACTI128|VEXTRACTI32x4|VEXTRACTI32x8|VEXTRACTI64x2|VEXTRACTI64x4|" \ + "VFIXUPIMMPD|VFIXUPIMMPS|VFIXUPIMMSD|VFIXUPIMMSS|VFMADD132PD|VFMADD132PS|" \ + "VFMADD132SD|VFMADD132SS|VFMADD213PD|VFMADD213PS|VFMADD213SD|VFMADD213SS|" \ + "VFMADD231PD|VFMADD231PS|VFMADD231SD|VFMADD231SS|VFMADDSUB132PD|" \ + "VFMADDSUB132PS|VFMADDSUB213PD|VFMADDSUB213PS|VFMADDSUB231PD|" \ + "VFMADDSUB231PS|VFMSUB132PD|VFMSUB132PS|VFMSUB132SD|VFMSUB132SS|" \ + "VFMSUB213PD|VFMSUB213PS|VFMSUB213SD|VFMSUB213SS|VFMSUB231PD|VFMSUB231PS|" \ + "VFMSUB231SD|VFMSUB231SS|VFMSUBADD132PD|VFMSUBADD132PS|VFMSUBADD213PD|" \ + "VFMSUBADD213PS|VFMSUBADD231PD|VFMSUBADD231PS|VFNMADD132PD|VFNMADD132PS|" \ + "VFNMADD132SD|VFNMADD132SS|VFNMADD213PD|VFNMADD213PS|VFNMADD213SD|" \ + "VFNMADD213SS|VFNMADD231PD|VFNMADD231PS|VFNMADD231SD|VFNMADD231SS|" \ + "VFNMSUB132PD|VFNMSUB132PS|VFNMSUB132SD|VFNMSUB132SS|VFNMSUB213PD|" \ + "VFNMSUB213PS|VFNMSUB213SD|VFNMSUB213SS|VFNMSUB231PD|VFNMSUB231PS|" \ + "VFNMSUB231SD|VFNMSUB231SS|VFPCLASSPD|VFPCLASSPS|VFPCLASSSD|VFPCLASSSS|" \ + "VGATHERDPD|VGATHERDPS|VGATHERQPD|VGATHERQPS|VGETEXPPD|VGETEXPPS|VGETEXPSD|" \ + "VGETEXPSS|VGETMANTPD|VGETMANTPS|VGETMANTSD|VGETMANTSS|VINSERTF128|" \ + "VINSERTF32x4|VINSERTF32x8|VINSERTF64x2|VINSERTF64x4|VINSERTI128|" \ + "VINSERTI32x4|VINSERTI32x8|VINSERTI64x2|VINSERTI64x4|VMASKMOV|VMOVDQA32|" \ + "VMOVDQA64|VMOVDQU16|VMOVDQU32|VMOVDQU64|VMOVDQU8|VPBLENDD|VPBLENDMB|" \ + "VPBLENDMD|VPBLENDMQ|VPBLENDMW|VPBROADCAST|VPBROADCASTB|VPBROADCASTD|" \ + "VPBROADCASTM|VPBROADCASTQ|VPBROADCASTW|VPCMPB|VPCMPD|VPCMPQ|VPCMPUB|" \ + "VPCMPUD|VPCMPUQ|VPCMPUW|VPCMPW|VPCOMPRESSD|VPCOMPRESSQ|VPCONFLICTD|" \ + "VPCONFLICTQ|VPERM2F128|VPERM2I128|VPERMB|VPERMD|VPERMI2B|VPERMI2D|" \ + "VPERMI2PD|VPERMI2PS|VPERMI2Q|VPERMI2W|VPERMILPD|VPERMILPS|VPERMPD|VPERMPS|" \ + "VPERMQ|VPERMT2B|VPERMT2D|VPERMT2PD|VPERMT2PS|VPERMT2Q|VPERMT2W|VPERMW|" \ + "VPEXPANDD|VPEXPANDQ|VPGATHERDD|VPGATHERDQ|VPGATHERQD|VPGATHERQQ|VPLZCNTD|" \ + "VPLZCNTQ|VPMADD52HUQ|VPMADD52LUQ|VPMASKMOV|VPMOVB2M|VPMOVD2M|VPMOVDB|" \ + "VPMOVDW|VPMOVM2B|VPMOVM2D|VPMOVM2Q|VPMOVM2W|VPMOVQ2M|VPMOVQB|VPMOVQD|" \ + "VPMOVQW|VPMOVSDB|VPMOVSDW|VPMOVSQB|VPMOVSQD|VPMOVSQW|VPMOVSWB|VPMOVUSDB|" \ + "VPMOVUSDW|VPMOVUSQB|VPMOVUSQD|VPMOVUSQW|VPMOVUSWB|VPMOVW2M|VPMOVWB|" \ + "VPMULTISHIFTQB|VPROLD|VPROLQ|VPROLVD|VPROLVQ|VPRORD|VPRORQ|VPRORVD|" \ + "VPRORVQ|VPSCATTERDD|VPSCATTERDQ|VPSCATTERQD|VPSCATTERQQ|VPSLLVD|VPSLLVQ|" \ + "VPSLLVW|VPSRAVD|VPSRAVQ|VPSRAVW|VPSRLVD|VPSRLVQ|VPSRLVW|VPTERNLOGD|" \ + "VPTERNLOGQ|VPTESTMB|VPTESTMD|VPTESTMQ|VPTESTMW|VPTESTNMB|VPTESTNMD|" \ + "VPTESTNMQ|VPTESTNMW|VRANGEPD|VRANGEPS|VRANGESD|VRANGESS|VRCP14PD|VRCP14PS|" \ + "VRCP14SD|VRCP14SS|VREDUCEPD|VREDUCEPS|VREDUCESD|VREDUCESS|VRNDSCALEPD|" \ + "VRNDSCALEPS|VRNDSCALESD|VRNDSCALESS|VRSQRT14PD|VRSQRT14PS|VRSQRT14SD|" \ + "VRSQRT14SS|VSCALEFPD|VSCALEFPS|VSCALEFSD|VSCALEFSS|VSCATTERDPD|" \ + "VSCATTERDPS|VSCATTERQPD|VSCATTERQPS|VSHUFF32x4|VSHUFF64x2|VSHUFI32x4|" \ + "VSHUFI64x2|VTESTPD|VTESTPS|VZEROALL|VZEROUPPER|WAIT|WBINVD|WRFSBASE|" \ + "WRGSBASE|WRMSR|WRPKRU|XABORT|XACQUIRE|XADD|XBEGIN|XCHG|XEND|XGETBV|XLAT|" \ + "XLATB|XOR|XORPD|XORPS|XRELEASE|XRSTOR|XRSTORS|XSAVE|XSAVEC|XSAVEOPT|" \ + "XSAVES|XSETBV|XTEST" + +vector g_TokenList = +{ + //{ TOKEN_TYPE::WhiteSpace, R"((\s+))" }, + { TOKEN_TYPE::WhiteSpace, R"(([ \t]+))" }, + { TOKEN_TYPE::NewLine, R"((\n))" }, + { TOKEN_TYPE::Comment, R"((;.*\n))" }, + { TOKEN_TYPE::HexNumber, R"(([0-9][0-9a-f]*h))" FOLLOWED_BY(R"([\s\n\+\-\*\/,=!\]\(\)])") }, + { TOKEN_TYPE::DecNumber, R"(([0-9]+))" FOLLOWED_BY(R"([\s\n\+\-\*\/,=!\]\(\)])") }, + { TOKEN_TYPE::String, R"((\".*\"))" }, + + { TOKEN_TYPE::BraceOpen, R"((\())"}, + { TOKEN_TYPE::BraceClose, R"((\)))"}, + { TOKEN_TYPE::MemRefStart, R"((\[))"}, + { TOKEN_TYPE::MemRefEnd, R"((\]))"}, + { TOKEN_TYPE::Colon, R"((\:))"}, + { TOKEN_TYPE::Operator, R"(([,\+\-\*\/\:]))"}, + { TOKEN_TYPE::StringDef, R"((<.+>))" }, + + { TOKEN_TYPE::KW_include, R"((include))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_const, R"((\.const))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_code, R"((\.code))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_endprolog, R"((\.endprolog))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_ALIGN, R"((ALIGN))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_EXTERN, R"((EXTERN))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_EXTERN, R"((EXTRN))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_PUBLIC, R"((PUBLIC))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_ENDM, R"((ENDM))" FOLLOWED_BY(R"([\s\;])") }, + { TOKEN_TYPE::KW_END, R"((END))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_if, R"((if))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_ifdef, R"((ifdef))" FOLLOWED_BY(R"([\s])")}, + { TOKEN_TYPE::KW_ifndef, R"((ifndef))" FOLLOWED_BY(R"([\s])")}, + { TOKEN_TYPE::KW_else, R"((else))" FOLLOWED_BY(R"([\s])")}, + { TOKEN_TYPE::KW_endif, R"((endif))" FOLLOWED_BY(R"([\s])")}, + + { TOKEN_TYPE::KW_allocstack, R"((.allocstack))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_savereg, R"((.savereg))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_savexmm128, R"((.savexmm128))" FOLLOWED_BY(R"([\s])") }, + + { TOKEN_TYPE::KW_DB, R"((DB))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_DW, R"((DW))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_DD, R"((DD))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_DQ, R"((DQ))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_EQU, R"((EQU))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_TEXTEQU, R"((TEXTEQU))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::KW_MACRO, R"((MACRO))" FOLLOWED_BY(R"([\s\;])") }, + { TOKEN_TYPE::KW_PROC, R"((PROC))" FOLLOWED_BY(R"([\s\;])") }, + { TOKEN_TYPE::KW_FRAME, R"((FRAME))" FOLLOWED_BY(R"([\s\;])") }, + { TOKEN_TYPE::KW_ENDP, R"((ENDP))" FOLLOWED_BY(R"([\s\;])") }, + { TOKEN_TYPE::KW_RECORD, R"((RECORD))" FOLLOWED_BY(R"([\s\;])") }, + { TOKEN_TYPE::KW_MASK, R"((MASK))" FOLLOWED_BY(R"([\s\;])")}, + { TOKEN_TYPE::KW_ERRDEF, R"((\.ERRDEF))" FOLLOWED_BY(R"([\s\;])")}, + + { TOKEN_TYPE::Filename, R"(([a-z_][a-z0-9_]*\.inc))" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::Instruction, "(" INSTRUCTION ")" FOLLOWED_BY(R"([\s])") }, + { TOKEN_TYPE::Reg8, R"((al|ah|bl|bh|cl|ch|dl|dh|sil|dil|bpl|spl|r8b|r9b|r10b|r11b|r12b|r13b|r14b|r15b))" FOLLOWED_BY(R"([\s\,])") }, + { TOKEN_TYPE::Reg16, R"((ax|bx|cx|dx|si|di|bp|sp|r8w|r9w|r10w|r11w|r12w|r13w|r14w|r15w))" FOLLOWED_BY(R"([\s\,])") }, + { TOKEN_TYPE::Reg32, R"((eax|ebx|ecx|edx|esi|edi|ebp|esp|r8d|r9d|r10d|r11d|r12d|r13d|r14d|r15d))" FOLLOWED_BY(R"([\s\,])") }, + { TOKEN_TYPE::Reg64, R"((rax|rbx|rcx|rdx|rsi|rdi|rbp|rsp|r8|r9|r10|r11|r12|r13|r14|r15))" FOLLOWED_BY(R"([\s\,])") }, + { TOKEN_TYPE::RegXmm, R"((xmm0|xmm1|xmm2|xmm3|xmm4|xmm5|xmm6|xmm7|xmm8|xmm9|xmm10|xmm11|xmm12|xmm13|xmm14|xmm15))" FOLLOWED_BY(R"([\s\,])") }, + { TOKEN_TYPE::BYTE_PTR, R"((BYTE[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") }, + { TOKEN_TYPE::WORD_PTR, R"((WORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") }, + { TOKEN_TYPE::DWORD_PTR, R"((DWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") }, + { TOKEN_TYPE::QWORD_PTR, R"((QWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") }, + { TOKEN_TYPE::XMMWORD_PTR, R"((XMMWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") }, + + { TOKEN_TYPE::Identifier, R"((@@))" FOLLOWED_BY(SEPARATOR)}, + { TOKEN_TYPE::Identifier, R"((@[a-z_][a-z0-9_]*))" FOLLOWED_BY(SEPARATOR)}, + { TOKEN_TYPE::Identifier, R"(([a-z_][a-z0-9_]*))" FOLLOWED_BY(SEPARATOR)}, + +}; + +// FIXME: use context? +unsigned int g_label_number = 0; + +vector g_identifiers; + +void +add_mem_id(Token& tok) +{ + g_identifiers.push_back(tok.str()); + //fprintf(stderr, "Added mem id: '%s'\n", tok.str().c_str()); +} + +bool +is_mem_id(Token& tok) +{ + for (auto id : g_identifiers) + { + if (id == tok.str()) + { + return true; + } + } + + return false; +} + +bool +iequals(const string &a, const string &b) +{ + size_t sz = a.size(); + if (b.size() != sz) + return false; + for (unsigned int i = 0; i < sz; ++i) + if (tolower(a[i]) != tolower(b[i])) + return false; + return true; +} + +Token +get_expected_token(Token&& tok, TOKEN_TYPE type) +{ + if (tok.type() != type) + { + throw "Not white space after identifier!\n"; + } + + return tok; +} + +Token get_ws(Token&& tok) +{ + int type = tok.type(); + if (type != TOKEN_TYPE::WhiteSpace) + { + throw "Not white space after identifier!\n"; + } + + return tok; +} + +Token get_ws_or_nl(Token&& tok) +{ + int type = tok.type(); + if ((type != TOKEN_TYPE::WhiteSpace) && + (type != TOKEN_TYPE::NewLine)) + { + throw "Not white space after identifier!\n"; + } + + return tok; +} + +bool is_string_in_list(vector list, string str) +{ + for (string &s : list) + { + if (s == str) + { + return true; + } + } + + return false; +} + +size_t +translate_token(TokenList& tokens, size_t index, const vector ¯o_params) +{ + Token tok = tokens[index]; + switch (tok.type()) + { + case TOKEN_TYPE::Comment: + printf("//%s", tok.str().c_str() + 1); + break; + + case TOKEN_TYPE::DecNumber: + { + unsigned long long num = stoull(tok.str(), nullptr, 10); + printf("%llu", num); + break; + } + + case TOKEN_TYPE::HexNumber: + { + string number = tok.str(); + printf("0x%s", number.substr(0, number.size() - 1).c_str()); + break; + } + + case TOKEN_TYPE::Identifier: + if (is_string_in_list(macro_params, tok.str())) + { + printf("\\"); + } + printf("%s", tok.str().c_str()); + break; + + // We migt want to improve these + case TOKEN_TYPE::BYTE_PTR: + case TOKEN_TYPE::WORD_PTR: + case TOKEN_TYPE::DWORD_PTR: + case TOKEN_TYPE::QWORD_PTR: + case TOKEN_TYPE::XMMWORD_PTR: + + // Check these. valid only in instructions? + case TOKEN_TYPE::Reg8: + case TOKEN_TYPE::Reg16: + case TOKEN_TYPE::Reg32: + case TOKEN_TYPE::Reg64: + case TOKEN_TYPE::RegXmm: + case TOKEN_TYPE::Instruction: + + case TOKEN_TYPE::WhiteSpace: + case TOKEN_TYPE::NewLine: + case TOKEN_TYPE::Operator: + printf("%s", tok.str().c_str()); + break; + + default: + printf("%s", tok.str().c_str()); + break; + } + + return index + 1; +} + +size_t complete_line(TokenList &tokens, size_t index, const vector ¯o_params) +{ + while (index < tokens.size()) + { + Token tok = tokens[index]; + index = translate_token(tokens, index, macro_params); + if ((tok.type() == TOKEN_TYPE::NewLine) || + (tok.type() == TOKEN_TYPE::Comment)) + { + break; + } + } + + return index; +} + +size_t +translate_expression(TokenList &tokens, size_t index, const vector ¯o_params) +{ + while (index < tokens.size()) + { + Token tok = tokens[index]; + switch (tok.type()) + { + case TOKEN_TYPE::NewLine: + case TOKEN_TYPE::Comment: + return index; + + case TOKEN_TYPE::KW_MASK: + printf("MASK_"); + index += 2; + break; + + case TOKEN_TYPE::Instruction: + if (iequals(tok.str(), "and")) + { + printf("&"); + index += 1; + } + else if (iequals(tok.str(), "or")) + { + printf("|"); + index += 1; + } + else if (iequals(tok.str(), "shl")) + { + printf("<<"); + index += 1; + } + else if (iequals(tok.str(), "not")) + { + printf("!"); + index += 1; + } + else + { + throw "Invalid expression"; + } + break; + + case TOKEN_TYPE::Operator: + if (tok.str() == ",") + { + return index; + } + case TOKEN_TYPE::WhiteSpace: + case TOKEN_TYPE::BraceOpen: + case TOKEN_TYPE::BraceClose: + case TOKEN_TYPE::DecNumber: + case TOKEN_TYPE::HexNumber: + case TOKEN_TYPE::Identifier: + index = translate_token(tokens, index, macro_params); + break; + + default: + index = translate_token(tokens, index, macro_params); + } + } + + return index; +} + +size_t translate_mem_ref(TokenList& tokens, size_t index, const vector& macro_params) +{ + unsigned int offset = 0; + + Token tok = tokens[index]; + + if ((tok.type() == TOKEN_TYPE::DecNumber) || + (tok.type() == TOKEN_TYPE::HexNumber)) + { + offset = stoi(tok.str(), nullptr, 0); + index += 2; + } + + index = translate_token(tokens, index, macro_params); + + while (index < tokens.size()) + { + Token tok = tokens[index]; + index = translate_token(tokens, index, macro_params); + if (tok.type() == TOKEN_TYPE::MemRefEnd) + { + if (offset != 0) + { + printf(" + %u", offset); + } + return index; + } + } + + throw "Failed to translate memory ref"; + return index; +} + +size_t translate_instruction_param(TokenList& tokens, size_t index, const vector& macro_params) +{ + switch (tokens[index].type()) + { + case TOKEN_TYPE::BYTE_PTR: + case TOKEN_TYPE::WORD_PTR: + case TOKEN_TYPE::DWORD_PTR: + case TOKEN_TYPE::QWORD_PTR: + case TOKEN_TYPE::XMMWORD_PTR: + index = translate_token(tokens, index, macro_params); + + // Optional whitespace + if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) + { + index = translate_token(tokens, index, macro_params); + } + } + + while (index < tokens.size()) + { + Token tok = tokens[index]; + switch (tok.type()) + { + case TOKEN_TYPE::MemRefStart: + return translate_mem_ref(tokens, index, macro_params); + + case TOKEN_TYPE::NewLine: + case TOKEN_TYPE::Comment: + return index; + + case TOKEN_TYPE::Operator: + if (tok.str() == ",") + return index; + + case TOKEN_TYPE::Identifier: + index = translate_token(tokens, index, macro_params); + if (is_mem_id(tok)) + { + printf("[rip]"); + } + break; + + default: + index = translate_expression(tokens, index, macro_params); + } + } + + return index; +} + +size_t translate_instruction(TokenList& tokens, size_t index, const vector& macro_params) +{ + // Translate the instruction itself + index = translate_token(tokens, index, macro_params); + + // Handle instruction parameters + while (index < tokens.size()) + { + // Optional whitespace + if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) + { + index = translate_token(tokens, index, macro_params); + } + + // Check for parameters + Token tok = tokens[index]; + switch (tok.type()) + { + case TOKEN_TYPE::Comment: + case TOKEN_TYPE::NewLine: + return index; + + case TOKEN_TYPE::WhiteSpace: + case TOKEN_TYPE::Operator: + index = translate_token(tokens, index, macro_params); + break; + + default: + index = translate_instruction_param(tokens, index, macro_params); + break; + } + } + + return index; +} + +size_t translate_item(TokenList& tokens, size_t index, const vector ¯o_params) +{ + switch (tokens[index].type()) + { + case TOKEN_TYPE::DecNumber: + case TOKEN_TYPE::HexNumber: + case TOKEN_TYPE::String: + case TOKEN_TYPE::WhiteSpace: + return translate_token(tokens, index, macro_params); + } + + throw "Failed to translate item"; + return -1; +} + +size_t translate_list(TokenList& tokens, size_t index, const vector ¯o_params) +{ + while (index < tokens.size()) + { + // The item itself + index = translate_item(tokens, index, macro_params); + + // Optional white space + if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) + { + index = translate_token(tokens, index, macro_params); + } + + // End of list? + if ((tokens[index].type() == TOKEN_TYPE::Comment) || + (tokens[index].type() == TOKEN_TYPE::NewLine)) + { + return index; + } + + // We expect a comma here + if ((tokens[index].type() != TOKEN_TYPE::Operator) || + (tokens[index].str() != ",")) + { + throw "Unexpected end of list"; + } + + index = translate_token(tokens, index, macro_params); + if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) + { + index = translate_token(tokens, index, macro_params); + } + } + + throw "Failed to translate list"; + return -1; +} + +size_t +translate_data_def(TokenList& tokens, size_t index, const vector& macro_params) +{ + Token tok = tokens[index]; + Token tok1 = get_ws(tokens[index + 1]); + string directive, need, have =""; + + switch (tok.type()) + { + case TOKEN_TYPE::KW_DB: + directive = ".byte"; + break; + + case TOKEN_TYPE::KW_DW: + directive = ".short"; + break; + + case TOKEN_TYPE::KW_DD: + directive = ".long"; + break; + + case TOKEN_TYPE::KW_DQ: + directive = ".quad"; + break; + } + + index += 2; + + while (index < tokens.size()) + { + // Check if we need '.ascii' for ASCII strings + if (tokens[index].str()[0] == '\"') + { + need = ".ascii"; + } + else + { + need = directive; + } + + // Output the directive we need (or a comma) + if (have == "") + { + printf("%s ", need.c_str()); + } + else if (have != need) + { + printf("\n%s ", need.c_str()); + } + else + { + printf(", "); + } + + have = need; + + // The item itself + index = translate_item(tokens, index, macro_params); + + // Optional white space + if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) + { + index = translate_token(tokens, index, macro_params); + } + + // End of list? + if ((tokens[index].type() == TOKEN_TYPE::Comment) || + (tokens[index].type() == TOKEN_TYPE::NewLine)) + { + return index; + } + + // We expect a comma here + if ((tokens[index].type() != TOKEN_TYPE::Operator) || + (tokens[index].str() != ",")) + { + throw "Unexpected end of list"; + } + + // Skip comma and optional white-space + index++; + if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) + { + index++; + } + } + + throw "Failed to translate list"; + return -1; +} + +size_t +translate_construct_one_param(string translated, TokenList& tokens, size_t index, const vector& macro_params) +{ + // The next token should be white space + Token tok1 = get_ws(tokens[index + 1]); + + printf("%s%s", translated.c_str(), tok1.str().c_str()); + return translate_expression(tokens, index + 2, macro_params); +} + +size_t +translate_record(TokenList &tokens, size_t index, const vector ¯o_params) +{ + unsigned int bits, bitpos = 0; + unsigned long long oldmask = 0, mask = 0; + + Token tok_name = get_expected_token(tokens[index], TOKEN_TYPE::Identifier); + index += 4; + while (index < tokens.size()) + { + Token tok_member = get_expected_token(tokens[index++], TOKEN_TYPE::Identifier); + + if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) + { + index++; + } + + if (tokens[index++].str() != ":") + { + throw "Unexpected token"; + } + + if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) + { + index++; + } + + Token tok_bits = tokens[index++]; + if ((tok_bits.type() != TOKEN_TYPE::DecNumber) && + (tok_bits.type() != TOKEN_TYPE::HexNumber)) + { + throw "Unexpected token"; + } + + bits = stoi(tok_bits.str(), nullptr, 0); + + printf("%s = %u\n", tok_member.str().c_str(), bitpos); + + oldmask = (1ULL << bitpos) - 1; + bitpos += bits; + mask = (1ULL << bitpos) - 1 - oldmask; + printf("MASK_%s = 0x%llx\n", tok_member.str().c_str(), mask); + + if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) + { + index++; + } + + if ((tokens[index].type() == TOKEN_TYPE::NewLine) || + (tokens[index].type() == TOKEN_TYPE::Comment)) + { + break; + } + + if (tokens[index].str() != ",") + { + throw "unexpected token"; + } + + index++; + if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) + { + index++; + } + + if ((tokens[index].type() == TOKEN_TYPE::NewLine) || + (tokens[index].type() == TOKEN_TYPE::Comment)) + { + index++; + } + + if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) + { + index++; + } + } + + return index; +} + +size_t +translate_identifier_construct(TokenList& tokens, size_t index, const vector ¯o_params) +{ + Token tok = tokens[index]; + Token tok1 = tokens[index + 1]; + + if (tok1.type() == TOKEN_TYPE::Colon) + { + if (tok.str() == "@@") + { + g_label_number++; + printf("%u:", g_label_number); + } + else + { + printf("%s:", tok.str().c_str()); + } + return index + 2; + } + + Token tok2 = tokens[index + 2]; + + switch (tok2.type()) + { + case TOKEN_TYPE::KW_MACRO: + throw "Cannot have a nested macro!"; + + case TOKEN_TYPE::KW_DB: + case TOKEN_TYPE::KW_DW: + case TOKEN_TYPE::KW_DD: + case TOKEN_TYPE::KW_DQ: + printf("%s:%s", tok.str().c_str(), tok1.str().c_str()); + add_mem_id(tok); + return translate_data_def(tokens, index + 2, macro_params); + + case TOKEN_TYPE::KW_EQU: + //printf("%s%s", tok.str().c_str(), tok1.str().c_str()); + printf("#define %s ", tok.str().c_str()); + return translate_expression(tokens, index + 3, macro_params); + + case TOKEN_TYPE::KW_TEXTEQU: + { + Token tok3 = get_ws(tokens[index + 3]); + Token tok4 = get_expected_token(tokens[index + 4], TOKEN_TYPE::StringDef); + + string textdef = tok4.str(); + printf("#define %s %s", tok.str().c_str(), textdef.substr(1, textdef.size() - 2).c_str()); + return index + 5; + } + + case TOKEN_TYPE::KW_PROC: + { + printf(".func %s\n", tok.str().c_str()); + printf("%s:", tok.str().c_str()); + index += 3; + + if ((tokens[index].type() == TOKEN_TYPE::WhiteSpace) && + (tokens[index + 1].type() == TOKEN_TYPE::KW_FRAME)) + { +#ifdef TARGET_amd64 + printf("\n.seh_proc %s\n", tok.str().c_str()); +#else + printf("\n.cfi_startproc\n"); +#endif + index += 2; + } + break; + } + + case TOKEN_TYPE::KW_ENDP: + { + printf(".seh_endproc\n.endfunc"); + index += 3; + break; + } + + case TOKEN_TYPE::KW_RECORD: + index = translate_record(tokens, index, macro_params); + break; + + default: + // We don't know what it is, assume it's a macro and treat it like an instruction + index = translate_instruction(tokens, index, macro_params); + break; + } + + return index; +} + +size_t +translate_construct(TokenList& tokens, size_t index, const vector ¯o_params) +{ + Token tok = tokens[index]; + + switch (tok.type()) + { + case TOKEN_TYPE::WhiteSpace: + case TOKEN_TYPE::NewLine: + case TOKEN_TYPE::Comment: + return translate_token(tokens, index, macro_params); + + case TOKEN_TYPE::Identifier: + return translate_identifier_construct(tokens, index, macro_params); + + case TOKEN_TYPE::KW_ALIGN: + index = translate_construct_one_param(".align", tokens, index, macro_params); + break; + + case TOKEN_TYPE::KW_allocstack: + index = translate_construct_one_param(".seh_stackalloc", tokens, index, macro_params); + break; + + case TOKEN_TYPE::KW_code: +#ifdef TARGET_amd64 + printf(".code64"); +#else + printf(".code"); +#endif + printf(" .intel_syntax noprefix"); + index++; + break; + + case TOKEN_TYPE::KW_const: + printf(".section .rdata"); + index++; + break; + + case TOKEN_TYPE::KW_DB: + case TOKEN_TYPE::KW_DW: + case TOKEN_TYPE::KW_DD: + case TOKEN_TYPE::KW_DQ: + return translate_data_def(tokens, index, macro_params); + + case TOKEN_TYPE::KW_END: + printf("// END\n"); + return tokens.size(); + + case TOKEN_TYPE::KW_endprolog: + printf(".seh_endprologue"); + index++; + break; + + case TOKEN_TYPE::KW_EXTERN: + { + Token tok1 = get_ws_or_nl(tokens[index + 1]); + Token tok2 = get_expected_token(tokens[index + 2], TOKEN_TYPE::Identifier); + add_mem_id(tok2); + printf("//"); + return complete_line(tokens, index, macro_params); + } + + case TOKEN_TYPE::KW_if: + case TOKEN_TYPE::KW_ifdef: + case TOKEN_TYPE::KW_ifndef: + case TOKEN_TYPE::KW_else: + case TOKEN_TYPE::KW_endif: + // TODO: handle parameter differences between "if" and ".if" etc. + printf("."); + return complete_line(tokens, index, macro_params); + + case TOKEN_TYPE::KW_include: + { + // The next token should be white space + Token tok1 = get_ws_or_nl(tokens[index + 1]); + Token tok2 = get_expected_token(tokens[index + 2], TOKEN_TYPE::Filename); + printf("#include \"%s.h\"", tok2.str().c_str()); + index += 3; + break; + } + + case TOKEN_TYPE::KW_PUBLIC: + index = translate_construct_one_param(".global", tokens, index, macro_params); + break; + + case TOKEN_TYPE::KW_savereg: + printf(".seh_savereg"); + return complete_line(tokens, index + 1, macro_params); + + case TOKEN_TYPE::KW_savexmm128: + printf(".seh_savexmm"); + return complete_line(tokens, index + 1, macro_params); + + case TOKEN_TYPE::Instruction: + index = translate_instruction(tokens, index, macro_params); + break; + + case TOKEN_TYPE::KW_ERRDEF: + printf("//"); + return complete_line(tokens, index, macro_params); + + default: + throw "failed to translate construct"; + } + + // Skip optional white-space + if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) + { + index++; + } + + // Line should end here! + Token end = tokens[index]; + if ((end.type() != TOKEN_TYPE::Comment) && + (end.type() != TOKEN_TYPE::NewLine)) + { + throw "unexpected tokens"; + } + + return index; +} + +size_t +translate_macro(TokenList& tokens, size_t index) +{ + vector macro_params; + + printf(".macro %s", tokens[index].str().c_str()); + + // Parse marameters + index += 3; + while (index < tokens.size()) + { + Token tok = tokens[index]; + switch (tok.type()) + { + case TOKEN_TYPE::NewLine: + case TOKEN_TYPE::Comment: + index = translate_token(tokens, index, macro_params); + break; + + case TOKEN_TYPE::Identifier: + macro_params.push_back(tok.str()); + printf("%s", tok.str().c_str()); + index++; + continue; + + case TOKEN_TYPE::WhiteSpace: + case TOKEN_TYPE::Operator: + index = translate_token(tokens, index, macro_params); + continue; + } + + break; + } + + // Parse content + while (index < tokens.size()) + { + Token tok = tokens[index]; + switch (tok.type()) + { + case TOKEN_TYPE::KW_ENDM: + printf(".endm"); + return index + 1; + + default: + index = translate_construct(tokens, index, macro_params); + } + } + + throw "Failed to translate macro"; + return -1; +} + +void +translate(TokenList &tokens) +{ + size_t index = 0; + size_t size = tokens.size(); + vector empty_macro_params; + + while (index < size) + { + // Macros are special + if ((tokens[index].type() == TOKEN_TYPE::Identifier) && + (tokens[index + 1].type() == TOKEN_TYPE::WhiteSpace) && + (tokens[index + 2].type() == TOKEN_TYPE::KW_MACRO)) + { + index = translate_macro(tokens, index); + } + else + { + index = translate_construct(tokens, index, empty_macro_params); + } + } +} + +int main(int argc, char* argv[]) +{ + if (argc < 2) + { + fprintf(stderr, "Invalid parameter!\n"); + return -1; + } + +#if PROFILING_ENABLED + time_t start_time = time(NULL); +#endif + + try + { + // Open and read the input file + string filename(argv[1]); + ifstream file(filename); + stringstream buffer; + buffer << file.rdbuf(); + string text = buffer.str(); + + // Create the tokenizer + Tokenizer tokenizer(g_TokenList); + + // Get a token list + TokenList toklist(tokenizer, text); + + // Now translate the tokens + translate(toklist); + } + catch (const char* message) + { + fprintf(stderr, "Exception caught: '%s'\n", message); + return -2; + } + +#if PROFILING_ENABLED + time_t total_time = time(NULL) + 1 - start_time; + fprintf(stderr, "total_time = %llu\n", total_time); + fprintf(stderr, "search_time = %llu\n", search_time); + fprintf(stderr, "search: %llu %%\n", search_time * 100 / total_time); +#endif + + return 0; +} diff --git a/sdk/tools/asmpp/asmpp.sln b/sdk/tools/asmpp/asmpp.sln new file mode 100644 index 00000000000..2fbcca4af83 --- /dev/null +++ b/sdk/tools/asmpp/asmpp.sln @@ -0,0 +1,31 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.32510.428 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "asmpp", "asmpp.vcxproj", "{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Debug|x64.ActiveCfg = Debug|x64 + {A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Debug|x64.Build.0 = Debug|x64 + {A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Debug|x86.ActiveCfg = Debug|Win32 + {A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Debug|x86.Build.0 = Debug|Win32 + {A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Release|x64.ActiveCfg = Release|x64 + {A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Release|x64.Build.0 = Release|x64 + {A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Release|x86.ActiveCfg = Release|Win32 + {A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {16936D9E-6E98-4126-8918-03218BC19061} + EndGlobalSection +EndGlobal diff --git a/sdk/tools/asmpp/asmpp.vcxproj b/sdk/tools/asmpp/asmpp.vcxproj new file mode 100644 index 00000000000..6b193af626d --- /dev/null +++ b/sdk/tools/asmpp/asmpp.vcxproj @@ -0,0 +1,151 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 16.0 + Win32Proj + {a1f7c9ee-4f70-43cd-a0be-85d137b80583} + asmpp2 + 10.0 + + + + Application + true + v142 + Unicode + + + Application + false + v142 + true + Unicode + + + Application + true + v142 + Unicode + + + Application + false + v142 + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + false + + + true + + + false + + + + Level3 + true + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + + + Console + true + + + + + Level3 + true + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + + + Console + true + true + true + + + + + Level3 + true + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp17 + + + Console + true + + + + + Level3 + true + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + + + Console + true + true + true + + + + + + + + + + + + \ No newline at end of file diff --git a/sdk/tools/asmpp/tokenizer.hpp b/sdk/tools/asmpp/tokenizer.hpp new file mode 100644 index 00000000000..d25d5bd9093 --- /dev/null +++ b/sdk/tools/asmpp/tokenizer.hpp @@ -0,0 +1,249 @@ +/* + * PROJECT: ReactOS host tools + * LICENSE: MIT (https://spdx.org/licenses/MIT) + * PURPOSE: Tokenizer class implementation + * COPYRIGHT: Copyright 2021 Timo Kreuzer + */ + +#include +#include +#include +#include +#include + +// Uncomment this for easier debugging +#if 0 +#define throw __debugbreak(); throw +#endif + +extern time_t search_time; + +struct TOKEN_DEF +{ + int Type; + std::string RegExString; +}; + +class Token +{ + const std::string& m_text; + unsigned int m_pos; + unsigned int m_len; +#if _DEBUG + std::string m_dbgstr; +#endif + int m_type; + +public: + + Token(const std::string& text, size_t pos, size_t len, int type) + : m_text(text), + m_pos(static_cast(pos)), + m_len(static_cast(len)), + m_type(type) + { +#if _DEBUG + m_dbgstr = str(); +#endif + } + + std::string str() const + { + return m_text.substr(m_pos, m_len); + } + + int type() const + { + return m_type; + } +}; + +struct Tokenizer +{ + const std::vector &m_tokendefs; + const std::regex m_re; + + typedef int myint; + + static + unsigned int + count_captures(const std::string& exp) + { + bool in_char_group = false; + unsigned int count = 0; + + for (size_t i = 0; i < exp.size(); i++) + { + char c = exp[i]; + + // Skip escaped characters + if (c == '\\') + { + i++; + continue; + } + + if (in_char_group) + { + if (c == ']') + { + in_char_group = false; + } + continue; + } + + if (c == '[') + { + in_char_group = true; + continue; + } + + if (c == '(') + { + if (exp[i + 1] != '?') + { + count++; + } + } + } + + return count; + } + + static + std::regex + CompileMultiRegex(const std::vector &tokendefs) + { + std::string combinedString; + + if (tokendefs.size() == 0) + { + return std::regex(); + } + + // Validate all token definitions + for (auto def : tokendefs) + { + size_t found = -1; + + // Count capture groups + unsigned int count = count_captures(def.RegExString); + if (count != 1) + { + throw "invalid count!\n"; + } + } + + // Combine all expressions into one (one capture group for each) + combinedString = "(?:" + tokendefs[0].RegExString + ")"; + for (size_t i = 1; i < tokendefs.size(); i++) + { + combinedString += "|(?:" + tokendefs[i].RegExString + ")"; + } + + return std::regex(combinedString, std::regex_constants::icase); + } + +public: + + struct TOKEN_REF + { + unsigned int pos; + unsigned int len; + int type; + }; + + Tokenizer(std::vector &tokendefs) + : m_tokendefs(tokendefs), + m_re(CompileMultiRegex(tokendefs)) + { + } + + TOKEN_REF match(std::smatch &matches, const std::string& str) const + { + return match(matches, str, 0); + } + + TOKEN_REF match(std::smatch &matches, const std::string &str, size_t startpos) const + { + const std::string::const_iterator first = str.cbegin() + startpos; + const std::string::const_iterator last = str.cend(); + + // If we reached the end, there is nothing more to do + if (first == last) + { + return TOKEN_REF{ static_cast(startpos), 0, -1 }; + } + + time_t start_time = time(NULL); + + // Try to find a match + if (!std::regex_search(first, last, matches, m_re)) + { + throw "Failed to match\n"; + } + + search_time += time(NULL) - start_time; + + // Validate that it's at the start of the string + if (matches.prefix().matched) + { + throw "Failed to match at current position!\n"; + } + + // We have a match, check which one it is + for (size_t i = 1; i < matches.size(); i++) + { + if (matches[i].matched) + { + unsigned int len = static_cast(matches.length(i)); + int type = m_tokendefs[i - 1].Type; + return TOKEN_REF{ static_cast(startpos), len, type}; + } + } + + // We should never get here + throw "Something went wrong!\n"; + } +}; + + +class TokenList +{ + using TOKEN_REF = typename Tokenizer::TOKEN_REF; + + const Tokenizer& m_tokenizer; + const std::string& m_text; + std::vector m_tokens; + +public: + + TokenList(const Tokenizer& tokenizer, const std::string& text) + : m_tokenizer(tokenizer), + m_text(text) + { + size_t startpos = 0; + size_t len = m_text.size(); + std::smatch matches; + + m_tokens.reserve(len / 5); + + while (startpos < len) + { + TOKEN_REF tref = m_tokenizer.match(matches, m_text, startpos); + m_tokens.push_back(tref); + startpos += tref.len; + }; + } + + size_t size() const + { + return m_tokens.size(); + } + + Token operator[](size_t n) const + { + return Token(m_text, m_tokens[n].pos, m_tokens[n].len, m_tokens[n].type); + } + +};