From 734f480e5f6e8ab2d41f18998799a1eb2cecb712 Mon Sep 17 00:00:00 2001 From: Alexander Nutz Date: Fri, 31 Oct 2025 20:16:59 +0100 Subject: [PATCH 1/8] Full ETC.a support, except for MO1 and MO2 --- .github/labeler.yml | 5 + CMakeLists.txt | 24 +- Makefile | 10 + arch/Etca/EtcaDisassembler.c | 607 +++++++++++++++++++++++++++++++++++ arch/Etca/EtcaDisassembler.h | 20 ++ arch/Etca/EtcaInstPrinter.c | 515 +++++++++++++++++++++++++++++ arch/Etca/EtcaInstPrinter.h | 22 ++ arch/Etca/EtcaModule.c | 41 +++ arch/Etca/EtcaModule.h | 12 + bindings/const_generator.py | 3 +- cs.c | 226 ++++++++----- cstool/cstool.c | 11 + cstool/cstool.h | 1 + cstool/cstool_etca.c | 14 + include/capstone/capstone.h | 4 + include/capstone/etca.h | 231 +++++++++++++ 16 files changed, 1656 insertions(+), 90 deletions(-) create mode 100644 arch/Etca/EtcaDisassembler.c create mode 100644 arch/Etca/EtcaDisassembler.h create mode 100644 arch/Etca/EtcaInstPrinter.c create mode 100644 arch/Etca/EtcaInstPrinter.h create mode 100644 arch/Etca/EtcaModule.c create mode 100644 arch/Etca/EtcaModule.h create mode 100644 cstool/cstool_etca.c create mode 100644 include/capstone/etca.h diff --git a/.github/labeler.yml b/.github/labeler.yml index 38bc906e51..168ebfba62 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -133,6 +133,11 @@ SH: - include/capstone/sh.h - tests/details/sh.yaml +Etca: + - arch/Etca/** + - cstool/cstool_etca.c + - include/capstone/etca.h + Sparc: - arch/Sparc/** - cstool/cstool_sparc.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 718115043b..2aa258cda1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,8 +106,8 @@ if(APPLE AND NOT CAPSTONE_BUILD_MACOS_THIN) set(CMAKE_OSX_ARCHITECTURES "x86_64;arm64") endif() -set(SUPPORTED_ARCHITECTURES ARM AARCH64 M68K MIPS PPC SPARC SYSTEMZ XCORE X86 TMS320C64X M680X EVM MOS65XX WASM BPF RISCV SH TRICORE ALPHA HPPA LOONGARCH XTENSA ARC) -set(SUPPORTED_ARCHITECTURE_LABELS ARM AARCH64 M68K MIPS PowerPC Sparc SystemZ XCore x86 TMS320C64x M680x EVM MOS65XX WASM BPF RISCV SH TriCore Alpha HPPA LoongArch Xtensa ARC) +set(SUPPORTED_ARCHITECTURES ARM AARCH64 M68K MIPS PPC SPARC SYSTEMZ XCORE X86 TMS320C64X M680X EVM MOS65XX WASM BPF RISCV SH Etca TRICORE ALPHA HPPA LOONGARCH XTENSA ARC) +set(SUPPORTED_ARCHITECTURE_LABELS ARM AARCH64 M68K MIPS PowerPC Sparc SystemZ XCore x86 TMS320C64x M680x EVM MOS65XX WASM BPF RISCV SH Etca TriCore Alpha HPPA LoongArch Xtensa ARC) # If building for OSX it's best to allow CMake to handle building both architectures if(APPLE AND NOT CAPSTONE_BUILD_MACOS_THIN) @@ -228,7 +228,7 @@ set(HEADERS_COMMON include/capstone/sh.h include/capstone/tricore.h include/capstone/platform.h - include/capstone/sh.h + include/capstone/etca.h include/capstone/alpha.h include/capstone/hppa.h include/capstone/loongarch.h @@ -633,6 +633,20 @@ if(CAPSTONE_SH_SUPPORT) ) endif() +if(CAPSTONE_Etca_SUPPORT) + add_definitions(-DCAPSTONE_HAS_ETCA) + set(SOURCES_ETCA + arch/Etca/EtcaDisassembler.c + arch/Etca/EtcaInstPrinter.c + arch/Etca/EtcaModule.c + ) + set(HEADERS_ETCA + arch/Etca/EtcaDisassembler.h + arch/Etca/EtcaInstPrinter.h + arch/Etca/EtcaModule.h + ) +endif() + if (CAPSTONE_TRICORE_SUPPORT) add_definitions(-DCAPSTONE_HAS_TRICORE) set(SOURCES_TRICORE @@ -765,6 +779,7 @@ set(ALL_SOURCES ${SOURCES_BPF} ${SOURCES_RISCV} ${SOURCES_SH} + ${SOURCES_ETCA} ${SOURCES_TRICORE} ${SOURCES_ALPHA} ${SOURCES_HPPA} @@ -793,6 +808,7 @@ set(ALL_HEADERS ${HEADERS_BPF} ${HEADERS_RISCV} ${HEADERS_SH} + ${HEADERS_ETCA} ${HEADERS_TRICORE} ${HEADERS_ALPHA} ${HEADERS_HPPA} @@ -868,6 +884,7 @@ source_group("Source\\MOS65XX" FILES ${SOURCES_MOS65XX}) source_group("Source\\BPF" FILES ${SOURCES_BPF}) source_group("Source\\RISCV" FILES ${SOURCES_RISCV}) source_group("Source\\SH" FILES ${SOURCES_SH}) +source_group("Source\\Etca" FILES ${SOURCES_ETCA}) source_group("Source\\TriCore" FILES ${SOURCES_TRICORE}) source_group("Source\\Alpha" FILES ${SOURCES_ALPHA}) source_group("Source\\HPPA" FILES ${SOURCES_HPPA}) @@ -894,6 +911,7 @@ source_group("Include\\MOS65XX" FILES ${HEADERS_MOS65XX}) source_group("Include\\BPF" FILES ${HEADERS_BPF}) source_group("Include\\RISCV" FILES ${HEADERS_RISCV}) source_group("Include\\SH" FILES ${HEADERS_SH}) +source_group("Include\\Etca" FILES ${HEADERS_ETCA}) source_group("Include\\TriCore" FILES ${HEADERS_TRICORE}) source_group("Include\\Alpha" FILES ${HEADERS_ALPHA}) source_group("Include\\HPPA" FILES ${HEADERS_HPPA}) diff --git a/Makefile b/Makefile index 59f6800edf..9e0c16cda0 100644 --- a/Makefile +++ b/Makefile @@ -273,6 +273,16 @@ ifneq (,$(findstring evm,$(CAPSTONE_ARCHS))) LIBOBJ_EVM += $(LIBSRC_EVM:%.c=$(OBJDIR)/%.o) endif +DEP_ETCA = +DEP_ETCA += $(wildcard arch/Etca/Etca*.inc) + +LIBOBJ_ETCA = +ifneq (,$(findstring etca,$(CAPSTONE_ARCHS))) + CFLAGS += -DCAPSTONE_HAS_ETCA + LIBSRC_ETCA += $(wildcard arch/Etca/Etca*.c) + LIBOBJ_ETCA += $(LIBSRC_ETCA:%.c=$(OBJDIR)/%.o) +endif + DEP_RISCV = DEP_RISCV += $(wildcard arch/RISCV/RISCV*.inc) diff --git a/arch/Etca/EtcaDisassembler.c b/arch/Etca/EtcaDisassembler.c new file mode 100644 index 0000000000..f20baf7eee --- /dev/null +++ b/arch/Etca/EtcaDisassembler.c @@ -0,0 +1,607 @@ +/* Capstone Disassembly Engine */ +/* By Alexander Nutz, 2025 */ + +#include "capstone/capstone.h" +#include "capstone/etca.h" +#include +#include +#include +#include +#include "../../cs_priv.h" +#include "../../MCInst.h" +#include "../../MCDisassembler.h" +#include "../../utils.h" +#include "EtcaDisassembler.h" +#include "capstone/sh.h" +#include + +/* +static void add_group(cs_detail *detail, cs_etca_insn_group group) +{ + if (detail != NULL && group > ETCA_GRP_INVALID && + group < ETCA_GRP_ENDING) + detail->groups[detail->groups_count++] = group; +} +*/ + +#define sign_extend(var, type, width) \ + if ((var) & (1 << ((width) - 1))) { \ + var = (((type) - 1) << (width)) | var; \ + } + +typedef struct { + struct { + bool present; + } pfx_cond; + + uint8_t cond : 4; + + struct { + bool present; + bool q : 1; + bool a : 1; + bool b : 1; + bool x : 1; + } pfx_rex; + + struct { + bool present; + uint8_t a : 3; + uint8_t b : 3; + uint8_t m : 3; + } abm; + + struct { + bool present; + uint8_t r : 3; + uint64_t imm; + } ri; + + struct { + bool present; + uint64_t extended; + } rel; + + union { + struct { + uint8_t sib; + uint64_t extended_disp; + } mo1; + + struct { + uint8_t sib; + uint64_t extended_disp; + uint64_t imm; + } mo2; + } x; + + uint8_t ss : 2; + etca_insn insn; +} DecodeIsntCtx; + +static bool doesSignExtend(etca_insn insn) +{ + switch (insn) { + case ETCA_INS_INVALID: + case ETCA_INS_NOP: + case ETCA_INS_ENDING: + case ETCA_INS_SYSCALL: + case ETCA_INS_ERET: + case ETCA_INS_WAIT: + case ETCA_INS_REL_JMP: + case ETCA_INS_ABS_JMP: + case ETCA_INS_REL_CALL: + case ETCA_INS_ABS_CALL: + case ETCA_INS_LEA: + case ETCA_INS_CACHE_FLUSH_ALL: + case ETCA_INS_DATA_PREFETCH: + case ETCA_INS_INSTRUCTION_PREFETCH: + case ETCA_INS_DCACHE_FLUSH: + case ETCA_INS_ICACHE_INVALIDATE: + case ETCA_INS_CACHE_INVALIDATE_ALL: + case ETCA_INS_DCACHE_INVALIDATE: + case ETCA_INS_ALLOC_ZERO: + return false; /* not applicable */ + + case ETCA_INS_ADD: + case ETCA_INS_SUB: + case ETCA_INS_RSUB: + case ETCA_INS_CMP: + case ETCA_INS_OR: + case ETCA_INS_XOR: + case ETCA_INS_AND: + case ETCA_INS_TEST: + case ETCA_INS_MOVS: + return true; /* sign extend */ + + case ETCA_INS_MOVZ: + case ETCA_INS_LOAD: + case ETCA_INS_STORE: + case ETCA_INS_SLO: + case ETCA_INS_READCR: + case ETCA_INS_WRITECR: + return false; /* zero extend */ + + case ETCA_INS_PUSH: + case ETCA_INS_POP: + case ETCA_INS_ADC: + case ETCA_INS_SBB: + case ETCA_INS_RSBB: + case ETCA_INS_ASR: + case ETCA_INS_ROL: + case ETCA_INS_ROR: + case ETCA_INS_SHL: + case ETCA_INS_SHR: + case ETCA_INS_RCL: + case ETCA_INS_RCR: + case ETCA_INS_POPCNT: + case ETCA_INS_GREV: + case ETCA_INS_CTZ: + case ETCA_INS_CLZ: + case ETCA_INS_NOT: + case ETCA_INS_ANDN: + case ETCA_INS_UDIV: + case ETCA_INS_SDIV: + case ETCA_INS_UREM: + case ETCA_INS_SREM: + case ETCA_INS_UMUL: + case ETCA_INS_SMUL: + case ETCA_INS_UHMUL: + case ETCA_INS_SHMUL: + case ETCA_INS_LSB: + case ETCA_INS_LSBMSK: + case ETCA_INS_RLSB: + case ETCA_INS_ZHIB: + return true; /* TODO: this makes no sense */ + } +} + +static void parseABM(DecodeIsntCtx *ctx, uint8_t byte) +{ + ctx->abm.present = true; + ctx->abm.a = byte >> 5; + ctx->abm.b = (byte >> 3) & (7 /* 0b111 */); + ctx->abm.m = byte & 3; +} + +static void parseRI(DecodeIsntCtx *ctx, uint8_t byte, etca_insn insn) +{ + ctx->ri.present = true; + ctx->ri.r = byte >> 5; + ctx->ri.imm = byte & 31 /* 0b11111 */; + if (doesSignExtend(insn)) + sign_extend(ctx->ri.imm, uint64_t, 5); +} + +static etca_insn parseExopOpcode(uint16_t opc) +{ + // clang-format off + switch (opc) + { + /* exop */ + case 0: return ETCA_INS_ADC; + case 1: return ETCA_INS_SBB; + case 2: return ETCA_INS_RSBB; + case 3: return ETCA_INS_ASR; + case 4: return ETCA_INS_ROL; + case 5: return ETCA_INS_ROR; + case 6: return ETCA_INS_SHL; + case 7: return ETCA_INS_SHR; + + /* bmi1 */ + case 8: return ETCA_INS_RCL; + case 9: return ETCA_INS_RCR; + case 10: return ETCA_INS_POPCNT; + case 11: return ETCA_INS_GREV; + case 12: return ETCA_INS_CTZ; + case 13: return ETCA_INS_CLZ; + case 14: return ETCA_INS_NOT; + case 15: return ETCA_INS_ANDN; + case 0x18: return ETCA_INS_LSB; + case 0x19: return ETCA_INS_LSBMSK; + case 0x1a: return ETCA_INS_RLSB; + case 0x1b: return ETCA_INS_ZHIB; + + /* md */ + case 0x10: return ETCA_INS_UDIV; + case 0x11: return ETCA_INS_SDIV; + case 0x12: return ETCA_INS_UREM; + case 0x13: return ETCA_INS_SREM; + case 0x14: return ETCA_INS_UMUL; + case 0x15: return ETCA_INS_SMUL; + case 0x16: return ETCA_INS_UHMUL; + case 0x17: return ETCA_INS_SHMUL; + } + // clang-format on + + return ETCA_INS_INVALID; +} + +static etca_insn parseBaseOpcode(uint8_t opc, bool imm) +{ + // clang-format off + switch (opc) + { + case 0: return ETCA_INS_ADD; + case 1: return ETCA_INS_SUB; + case 2: return ETCA_INS_RSUB; + case 3: return ETCA_INS_CMP; + case 4: return ETCA_INS_OR; + case 5: return ETCA_INS_XOR; + case 6: return ETCA_INS_AND; + case 7: return ETCA_INS_TEST; + case 8: return ETCA_INS_MOVZ; + case 9: return ETCA_INS_MOVS; + case 10: return ETCA_INS_LOAD; + case 11: return ETCA_INS_STORE; + case 12: return imm ? ETCA_INS_SLO : ETCA_INS_INVALID; + case 14: return imm ? ETCA_INS_READCR : ETCA_INS_INVALID; + case 15: return imm ? ETCA_INS_WRITECR : ETCA_INS_INVALID; + } + // clang-format on + + return ETCA_INS_INVALID; +} + +static bool parseCoreOp(DecodeIsntCtx *ctx, const uint8_t **code_p, + size_t *code_len_p, uint16_t *size) +{ + const uint8_t *code = *code_p; + size_t code_len = *code_len_p; + + if (code_len >= 3 && code[0] >> 4 == 2 + 4 + 8 /* 0b1110 */) { + uint16_t opc = (code[0] & 0xF) << 5; + opc |= (code[1] >> 7) << 4; + opc |= code[1] & 0xF; + ctx->insn = parseExopOpcode(opc); + if (ctx->insn == ETCA_INS_INVALID) + return false; + + ctx->ss = (code[1] >> 4) & 3; + + if (code[1] & (1 << 6)) { + parseRI(ctx, code[2], ctx->insn); + } else { + parseABM(ctx, code[2]); + } + + code += 3; + code_len -= 3; + (*size) += 3; + } else if (code_len >= 2 && code[0] == 0x2F && code[1] == 0x11) { + ctx->insn = ETCA_INS_WAIT; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] == 0x0F && code[1] == 0x11) { + ctx->insn = ETCA_INS_SYSCALL; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] == 0x1F && code[1] == 0x11) { + ctx->insn = ETCA_INS_ERET; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] >> 6 == 0 && + (code[0] & 0xF) == 0xF && (code[1] << 3) >> 3 == 0) { + ctx->insn = ETCA_INS_ALLOC_ZERO; + ctx->abm.present = true; + ctx->abm.a = code[1] >> 5; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] >> 6 == 0 && + (code[0] & 0xF) == 0xF && (code[1] << 3) >> 3 == 4) { + ctx->insn = ETCA_INS_DCACHE_INVALIDATE; + ctx->abm.present = true; + ctx->abm.a = code[1] >> 5; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] == 0x3F && code[1] == 0x11) { + ctx->insn = ETCA_INS_CACHE_INVALIDATE_ALL; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] == 0x8F && code[1] == 0x01) { + ctx->insn = ETCA_INS_CACHE_FLUSH_ALL; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] >> 6 == 0 && + (code[0] & 0xF) == 0xc) { + ctx->ss = (code[0] >> 4) & 3; + ctx->insn = ETCA_INS_POP; + parseABM(ctx, code[1]); + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] >> 6 == 0 && + (code[0] & 0xF) == 0xd) { + ctx->ss = (code[0] >> 4) & 3; + ctx->insn = ETCA_INS_PUSH; + parseABM(ctx, code[1]); + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] >> 6 == 1 && + (code[0] & 0xF) == 0xd) { + ctx->ss = (code[0] >> 4) & 3; + ctx->insn = ETCA_INS_PUSH; + parseRI(ctx, code[1], ctx->insn); + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] == 0x9F && + ((code[1] >> 2) & 3) == 0) { + // clang-format off + switch (code[1] & 3) + { + case 0: ctx->insn = ETCA_INS_DATA_PREFETCH; break; + case 1: ctx->insn = ETCA_INS_INSTRUCTION_PREFETCH; break; + case 2: ctx->insn = ETCA_INS_DCACHE_FLUSH; break; + case 3: ctx->insn = ETCA_INS_DCACHE_INVALIDATE; break; + } + // clang-format on + + ctx->abm.present = true; + ctx->abm.a = code[1] >> 5; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] == 0xaf) { + if ((code[1] >> 4) & 1) + ctx->insn = ETCA_INS_ABS_CALL; + else + ctx->insn = ETCA_INS_ABS_JMP; + + if (ctx->pfx_cond.present) + return false; + + ctx->cond = code[1] & 0xF; + + ctx->abm.present = true; + ctx->abm.a = code[1] >> 5; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] >> 4 == 0xb) { + ctx->insn = ETCA_INS_REL_CALL; + + uint64_t d = (code[0] & 0xF) << 8 | code[1]; + sign_extend(d, uint64_t, 12); + ctx->rel.present = true; + ctx->rel.extended = d; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] >> 5 == 4) { + ctx->insn = ETCA_INS_REL_JMP; + + if (ctx->pfx_cond.present) + return false; + + ctx->cond = code[0] & 0xF; + + uint64_t d = ((code[0] >> 4) & 1) << 8 | code[1]; + sign_extend(d, uint64_t, 9); + ctx->rel.present = true; + ctx->rel.extended = d; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len && code[0] >> 4 == 0xF && + code_len >= 1 + (1 << (code[0] & 3))) { + ctx->ss = code[0] & 3; + int sz = 1 << ctx->ss; + + uint64_t d = 0; + for (int i = 0; i < sz; i++) { + d <<= 8; + d |= code[i + 1]; + } + ctx->rel.present = true; + ctx->rel.extended = d; + + // clang-format off + switch ((code[0] >> 2) & 3) { + case 0: ctx->insn = ETCA_INS_REL_JMP; break; + case 1: ctx->insn = ETCA_INS_ABS_JMP; break; + case 2: ctx->insn = ETCA_INS_REL_CALL; break; + case 3: ctx->insn = ETCA_INS_ABS_CALL; break; + } + // clang-format on + + if (ctx->insn == ETCA_INS_REL_JMP || + ctx->insn == ETCA_INS_REL_CALL) + sign_extend(d, uint64_t, sz * 8); + + code += sz + 1; + code_len -= sz + 1; + (*size) += sz + 1; + } else if (code_len >= 2 && code[0] >> 7 == 0 && + code[0] >> 2 != 7 /* 0b111 */) { + bool imm = code[0] & (1 << 6); + + if (imm) { + parseRI(ctx, code[1], ctx->insn); + } else { + parseABM(ctx, code[1]); + } + + /* abm with fi is treated as imm */ + if (!imm && ctx->abm.m == 1 && + (ctx->abm.b == 2 || ctx->abm.b == 3)) { + imm = true; + } + + ctx->insn = parseBaseOpcode(code[0] & 0xF, imm); + if (ctx->insn == ETCA_INS_INVALID) + return false; + + ctx->ss = (code[0] >> 4) & 3; + + code += 2; + code_len -= 2; + (*size) += 2; + } else { + return false; + } + + *code_len_p = code_len; + *code_p = code; + + return true; +} + +// returns true if valid +bool Etca_getInstruction(csh ud, const uint8_t *code, size_t code_len, + MCInst *mcInstr, uint16_t * /* out */ size, + uint64_t address, void *infoIn) +{ + etca_info *info = infoIn; + // cs_detail *detail = mcInstr->flat_insn->detail; + + DecodeIsntCtx ctx = { 0 }; + ctx.insn = ETCA_INS_INVALID; + ctx.cond = ETCA_COND_ALWAYS; + *size = 0; + + /* conditional prefix */ + if (code_len && code[0] >> 4 == 2 + 8 /* 0b1010 */ && + (code[0] & 0xF) < 14) { + // cond prefix of always / never isn't allowed + ctx.pfx_cond.present = true; + ctx.cond = code[0] & 0xF; + code++; + code_len--; + (*size)++; + } + + /* register expansion prefix */ + if (code_len && code[0] >> 4 == 4 + 8 /* 0b1100 */) { + ctx.pfx_rex.present = true; + ctx.pfx_rex.q = (code[0] >> 3) & 1; + ctx.pfx_rex.a = (code[0] >> 2) & 1; + ctx.pfx_rex.b = (code[0] >> 1) & 1; + ctx.pfx_rex.x = (code[0] >> 0) & 1; + code++; + code_len--; + (*size)++; + } + + // if (code_len && code[0] >> 4 == 1 + 4 + 8 /* 0b1101 */) + // unused prefix + + if (!parseCoreOp(&ctx, &code, &code_len, size)) + return false; + + if (ctx.abm.present && ctx.abm.m == 1 && ctx.abm.b == 2) { + /* 1B full immediate */ + if (!(code_len >= 1)) + return false; + ctx.abm.present = false; + ctx.ri.present = true; + ctx.ri.r = ctx.abm.a; + ctx.ri.imm = code[0]; + + code += 1; + code_len -= 1; + (*size) += 1; + } else if (ctx.abm.present && ctx.abm.m == 1 && ctx.abm.b == 3) { + /* nB full immediate */ + size_t sz = 1 << ctx.ss; + if (sz == 8 && !(ctx.pfx_rex.present && ctx.pfx_rex.q)) + sz = 4; + + if (!(code_len >= sz)) + return false; + ctx.abm.present = false; + ctx.ri.present = true; + ctx.ri.r = ctx.abm.a; + ctx.ri.imm = 0; + for (size_t i = 0; i < sz; i++) { + ctx.ri.imm <<= 8; + ctx.ri.imm |= code[i]; + } + + code += sz; + code_len -= sz; + (*size) += sz; + } + + memset(info, 0, sizeof(*info)); + info->op.cond = ctx.cond; + info->op.insn = ctx.insn; + info->op.ss = ctx.ss; + + if (ctx.rel.present) { + info->op.op_count = 1; + + cs_etca_op *rel = &info->op.operands[0]; + rel->type = ETCA_OP_IMM; + rel->imm = ctx.rel.extended; + } else if (ctx.ri.present) { + info->op.op_count = 2; + + info->op.operands[0].type = ETCA_OP_REG; + info->op.operands[0].reg = + ((ctx.pfx_rex.present && ctx.pfx_rex.a) ? + ETCA_REG_FIRST_REX : + ETCA_REG_FIRST_BASE) + + ctx.ri.r; + + info->op.operands[1].type = ETCA_OP_IMM; + info->op.operands[1].imm = ctx.ri.imm; + } else if (ctx.abm.present) { + info->op.op_count = 2; + + info->op.operands[0].type = ETCA_OP_REG; + info->op.operands[0].reg = + ((ctx.pfx_rex.present && ctx.pfx_rex.a) ? + ETCA_REG_FIRST_REX : + ETCA_REG_FIRST_BASE) + + ctx.abm.a; + + info->op.operands[1].type = ETCA_OP_REG; + info->op.operands[1].reg = + ((ctx.pfx_rex.present && ctx.pfx_rex.a) ? + ETCA_REG_FIRST_REX : + ETCA_REG_FIRST_BASE) + + ctx.abm.b; + + // TODO: mo1 & mo2 + if (ctx.abm.m != 0) + return false; + } + + // TODO: add_group + + return true; +} + +#ifndef CAPSTONE_DIET +void Etca_reg_access(const cs_insn *insn, cs_regs regs_read, + uint8_t *regs_read_count, cs_regs regs_write, + uint8_t *regs_write_count) +{ + *regs_read_count = 0; + *regs_write_count = 0; + // TODO +} +#endif diff --git a/arch/Etca/EtcaDisassembler.h b/arch/Etca/EtcaDisassembler.h new file mode 100644 index 0000000000..3a9688471f --- /dev/null +++ b/arch/Etca/EtcaDisassembler.h @@ -0,0 +1,20 @@ +/* Capstone Disassembly Engine */ +/* By Alexander Nutz, 2025 */ + +#ifndef CS_ETCA_DISASSEMBLER_H +#define CS_ETCA_DISASSEMBLER_H + +#include "../../MCInst.h" + +typedef struct etca_info { + cs_etca op; +} etca_info; + +bool Etca_getInstruction(csh ud, const uint8_t *code, size_t code_len, + MCInst *instr, uint16_t *size, uint64_t address, + void *info); + +void Etca_reg_access(const cs_insn *insn, cs_regs regs_read, + uint8_t *regs_read_count, cs_regs regs_write, + uint8_t *regs_write_count); +#endif diff --git a/arch/Etca/EtcaInstPrinter.c b/arch/Etca/EtcaInstPrinter.c new file mode 100644 index 0000000000..483d76b463 --- /dev/null +++ b/arch/Etca/EtcaInstPrinter.c @@ -0,0 +1,515 @@ +/* Capstone Disassembly Engine */ +/* By Alexander Nutz, 2025 */ + +#include "EtcaDisassembler.h" +#include "capstone/etca.h" +#include "../../Mapping.h" +#include "EtcaInstPrinter.h" + +const char *Etca_reg_name(csh handle, unsigned int reg) +{ +#ifdef CAPSTONE_DIET + return NULL; +#else + if (reg >= ETCA_REG_ENDING) + reg = ETCA_REG_INVALID; + + switch ((cs_etca_reg)reg) { + case ETCA_REG_INVALID: + case ETCA_REG_ENDING: + return ""; + + case ETCA_REG_R0: + return "r0"; + case ETCA_REG_R1: + return "r1"; + case ETCA_REG_R2: + return "r2"; + case ETCA_REG_R3: + return "r3"; + case ETCA_REG_R4: + return "r4"; + case ETCA_REG_R5: + return "r5"; + case ETCA_REG_R6: + return "r6"; + case ETCA_REG_R7: + return "r7"; + + case ETCA_REG_R8: + return "r8"; + case ETCA_REG_R9: + return "r9"; + case ETCA_REG_R10: + return "r10"; + case ETCA_REG_R11: + return "r11"; + case ETCA_REG_R12: + return "r12"; + case ETCA_REG_R13: + return "r13"; + case ETCA_REG_R14: + return "r14"; + case ETCA_REG_R15: + return "r15"; + } +#endif +} + +void Etca_get_insn_id(cs_struct *h, cs_insn *insn, unsigned int id) +{ + insn->id = id; // These id's matches for etca +} + +const char *Etca_insn_name(csh handle, unsigned int id) +{ +#ifdef CAPSTONE_DIET + return NULL; +#else + if (id >= ETCA_INS_ENDING) + id = ETCA_INS_INVALID; + + switch ((etca_insn)id) { + case ETCA_INS_INVALID: + case ETCA_INS_ENDING: + return ""; + + case ETCA_INS_NOP: + return "nop"; + + case ETCA_INS_REL_JMP: + return "rel_jmp"; + case ETCA_INS_ABS_JMP: + return "abs_jmp"; + case ETCA_INS_REL_CALL: + return "rel_call"; + case ETCA_INS_ABS_CALL: + return "abs_call"; + + case ETCA_INS_ADD: + return "add"; + case ETCA_INS_SUB: + return "sub"; + case ETCA_INS_RSUB: + return "rsub"; + case ETCA_INS_CMP: + return "cmp"; + case ETCA_INS_OR: + return "or"; + case ETCA_INS_XOR: + return "xor"; + case ETCA_INS_AND: + return "and"; + case ETCA_INS_TEST: + return "test"; + case ETCA_INS_MOVZ: + return "movz"; + case ETCA_INS_MOVS: + return "movs"; + case ETCA_INS_LOAD: + return "load"; + case ETCA_INS_STORE: + return "store"; + case ETCA_INS_SLO: + return "slo"; + + case ETCA_INS_READCR: + return "readcr"; + case ETCA_INS_WRITECR: + return "writecr"; + case ETCA_INS_SYSCALL: + return "syscall"; + case ETCA_INS_ERET: + return "eret"; + case ETCA_INS_WAIT: + return "wait"; + + case ETCA_INS_PUSH: + return "push"; + case ETCA_INS_POP: + return "pop"; + case ETCA_INS_LEA: + return "lea"; + case ETCA_INS_ADC: + return "adc"; + case ETCA_INS_SBB: + return "sbb"; + case ETCA_INS_RSBB: + return "rsbb"; + case ETCA_INS_ASR: + return "asr"; + case ETCA_INS_ROL: + return "rol"; + case ETCA_INS_ROR: + return "ror"; + case ETCA_INS_SHL: + return "shl"; + case ETCA_INS_SHR: + return "shr"; + case ETCA_INS_RCL: + return "rcl"; + case ETCA_INS_RCR: + return "rcr"; + case ETCA_INS_POPCNT: + return "popcnt"; + case ETCA_INS_GREV: + return "grev"; + case ETCA_INS_CTZ: + return "ctz"; + case ETCA_INS_CLZ: + return "clz"; + case ETCA_INS_NOT: + return "not"; + case ETCA_INS_ANDN: + return "andn"; + case ETCA_INS_UDIV: + return "udiv"; + case ETCA_INS_SDIV: + return "sdiv"; + case ETCA_INS_UREM: + return "urem"; + case ETCA_INS_SREM: + return "srem"; + case ETCA_INS_UMUL: + return "umul"; + case ETCA_INS_SMUL: + return "smul"; + case ETCA_INS_UHMUL: + return "uhmul"; + case ETCA_INS_SHMUL: + return "shmul"; + case ETCA_INS_LSB: + return "lsb"; + case ETCA_INS_LSBMSK: + return "lsmsk"; + case ETCA_INS_RLSB: + return "rlsb"; + case ETCA_INS_ZHIB: + return "zhib"; + + case ETCA_INS_CACHE_FLUSH_ALL: + return "cache_flush_all"; + case ETCA_INS_DATA_PREFETCH: + return "data_prefetch"; + case ETCA_INS_INSTRUCTION_PREFETCH: + return "instruction_prefetch"; + case ETCA_INS_DCACHE_FLUSH: + return "dcache_flush"; + case ETCA_INS_ICACHE_INVALIDATE: + return "icache_invalidate"; + case ETCA_INS_CACHE_INVALIDATE_ALL: + return "cache_invalidate_all"; + case ETCA_INS_DCACHE_INVALIDATE: + return "dcache_invalidate"; + case ETCA_INS_ALLOC_ZERO: + return "alloc_zero"; + } +#endif +} + +char const *cs_etca_cond_name(uint8_t cond) +{ + switch (cond) { + case ETCA_COND_Z: + return "z"; + case ETCA_COND_N: + return "n"; + case ETCA_COND_C: + return "c"; + case ETCA_COND_O: + return "o"; + case ETCA_COND_BE: + return "be"; + case ETCA_COND_L: + return "l"; + case ETCA_COND_LE: + return "le"; + case ETCA_COND_ALWAYS: + return "always"; + + case ETCA_COND_NZ: + return "nz"; + case ETCA_COND_NN: + return "nn"; + case ETCA_COND_NC: + return "nc"; + case ETCA_COND_NO: + return "no"; + case ETCA_COND_A: + return "a"; + case ETCA_COND_GE: + return "ge"; + case ETCA_COND_G: + return "u"; + case ETCA_COND_NEVER: + return "never"; + + default: + return NULL; + } +} + +char const *cs_etca_cr_name(cs_etca_cr cr) +{ + switch (cr) { + case ETCA_CR_CPUID1: + return "cpuid1"; + case ETCA_CR_CPUID2: + return "cpuid2"; + case ETCA_CR_FEAT: + return "feat"; + case ETCA_CR_FLAGS: + return "flags"; + case ETCA_CR_INT_PC: + return "int_pc"; + case ETCA_CR_INT_RET_PC: + return "int_ret_pc"; + case ETCA_CR_INT_MASK: + return "int_mask"; + case ETCA_CR_INT_PENDING: + return "int_pending"; + case ETCA_CR_INT_CAUSE: + return "int_cause"; + case ETCA_CR_INT_DATA: + return "int_data"; + case ETCA_CR_INT_SCRATCH_0: + return "int_scratch_0"; + case ETCA_CR_INT_SCRATCH_1: + return "int_scratch_1"; + case ETCA_CR_PRIV: + return "priv"; + case ETCA_CR_INT_RET_PRIV: + return "int_ret_priv"; + case ETCA_CR_CACHE_LINE_SIZE: + return "cache_line_size"; + case ETCA_CR_NO_CACHE_START: + return "no_cache_start"; + case ETCA_CR_NO_CACHE_END: + return "no_cache_end"; + case ETCA_CR_MODE: + return "mode"; + } + return NULL; +} + +#ifndef CAPSTONE_DIET +static void printReg(SStream *O, cs_etca_reg reg) +{ + SStream_concat1(O, '%'); + SStream_concat0(O, Etca_reg_name(0, reg)); +} +#endif + +#ifndef CAPSTONE_DIET +static void printMemOp(SStream *O, cs_etca_op_mem *op) +{ + bool first = true; + SStream_concat1(O, '['); + + if (op->base.enabled) { + printReg(O, op->base.base); + } + + if (op->index.enabled) { + if (!first) { + SStream_concat0(O, " + "); + } + first = false; + + printUInt8(O, 1 << op->index.index_multiplier_log2); + SStream_concat1(O, '*'); + printReg(O, op->index.index); + } + + if (op->displacement) { + if (!first) { + SStream_concat0(O, " + "); + } + first = false; + + printInt32(O, op->displacement); + } + + SStream_concat1(O, ']'); +} +#endif + +#ifndef CAPSTONE_DIET +static void printOp(SStream *O, cs_etca_op *op, etca_insn insn) +{ + switch (op->type) { + case ETCA_OP_INVALID: + SStream_concat0(O, ""); + break; + + case ETCA_OP_REG: + printReg(O, op->reg); + break; + + case ETCA_OP_IMM: + if ((int64_t)op->imm < 0 && + (insn == ETCA_INS_REL_JMP || insn == ETCA_INS_REL_CALL || + (int64_t)op->imm >= -63)) { + SStream_concat1(O, '-'); + printUInt64(O, -op->imm); + } else { + printUInt64(O, op->imm); + } + break; + + case ETCA_OP_MEM: + printMemOp(O, &op->mem); + break; + } +} +#endif + +#ifndef CAPSTONE_DIET +static bool isSizedInsn(etca_insn insn) +{ + switch (insn) { + case ETCA_INS_INVALID: + case ETCA_INS_NOP: + case ETCA_INS_ENDING: + case ETCA_INS_REL_JMP: + case ETCA_INS_ABS_JMP: + case ETCA_INS_REL_CALL: + case ETCA_INS_ABS_CALL: + case ETCA_INS_ERET: + case ETCA_INS_SYSCALL: + case ETCA_INS_WAIT: + case ETCA_INS_CACHE_FLUSH_ALL: + case ETCA_INS_DATA_PREFETCH: + case ETCA_INS_INSTRUCTION_PREFETCH: + case ETCA_INS_DCACHE_FLUSH: + case ETCA_INS_ICACHE_INVALIDATE: + case ETCA_INS_CACHE_INVALIDATE_ALL: + case ETCA_INS_DCACHE_INVALIDATE: + case ETCA_INS_ALLOC_ZERO: + return false; + + case ETCA_INS_ADD: + case ETCA_INS_SUB: + case ETCA_INS_RSUB: + case ETCA_INS_CMP: + case ETCA_INS_OR: + case ETCA_INS_XOR: + case ETCA_INS_AND: + case ETCA_INS_TEST: + case ETCA_INS_MOVZ: + case ETCA_INS_MOVS: + case ETCA_INS_LOAD: + case ETCA_INS_STORE: + case ETCA_INS_SLO: + case ETCA_INS_READCR: + case ETCA_INS_WRITECR: + case ETCA_INS_PUSH: + case ETCA_INS_POP: + case ETCA_INS_LEA: + case ETCA_INS_ADC: + case ETCA_INS_SBB: + case ETCA_INS_RSBB: + case ETCA_INS_ASR: + case ETCA_INS_ROL: + case ETCA_INS_ROR: + case ETCA_INS_SHL: + case ETCA_INS_SHR: + case ETCA_INS_RCL: + case ETCA_INS_RCR: + case ETCA_INS_POPCNT: + case ETCA_INS_GREV: + case ETCA_INS_CTZ: + case ETCA_INS_CLZ: + case ETCA_INS_NOT: + case ETCA_INS_ANDN: + case ETCA_INS_UDIV: + case ETCA_INS_SDIV: + case ETCA_INS_UREM: + case ETCA_INS_SREM: + case ETCA_INS_UMUL: + case ETCA_INS_SMUL: + case ETCA_INS_UHMUL: + case ETCA_INS_SHMUL: + case ETCA_INS_LSB: + case ETCA_INS_LSBMSK: + case ETCA_INS_RLSB: + case ETCA_INS_ZHIB: + return true; + } +} +#endif + +void Etca_printInst(MCInst *MI, SStream *O, void *infoIn) +{ +#ifndef CAPSTONE_DIET + etca_info *info = (etca_info *)infoIn; + + // first word in buffer has to be mnemonic because of SStream_extract_mnem_opstr + SStream_concat0(O, Etca_insn_name(0, info->op.insn)); + + if (info->op.cond != ETCA_COND_ALWAYS) { + SStream_concat(O, " when %s, ", + cs_etca_cond_name(info->op.cond)); + } + + if (isSizedInsn(info->op.insn)) { + char s = "hxdq"[info->op.ss & 3]; + SStream_concat1(O, s); + } + + int numPrinted = 0; + for (int i = 0; i < info->op.op_count; i++) { + /* don't print sp reg if it's the default */ + if (i == 1 && info->op.insn == ETCA_INS_POP && + info->op.operands[1].type == ETCA_OP_REG && + info->op.operands[1].reg == ETCA_REG_R6) + continue; + if (i == 0 && info->op.insn == ETCA_INS_PUSH && + info->op.operands[0].type == ETCA_OP_REG && + info->op.operands[0].reg == ETCA_REG_R6) + continue; + + if (numPrinted != 0) { + SStream_concat0(O, ","); + } + SStream_concat0(O, " "); + + cs_etca_op *op = &info->op.operands[i]; + + char const *crname; + if (i == 1 && + (info->op.insn == ETCA_INS_READCR || + info->op.insn == ETCA_INS_WRITECR) && + (crname = cs_etca_cr_name(op->imm))) { + SStream_concat0(O, crname); + } else { + printOp(O, op, info->op.insn); + } + + numPrinted++; + } +#endif +} + +const char *Etca_group_name(csh handle, unsigned int id) +{ +#ifndef CAPSTONE_DIET + if (id >= ETCA_GRP_ENDING) + id = ETCA_GRP_ENDING; + + switch ((cs_etca_insn_group)id) { + case ETCA_GRP_ENDING: + case ETCA_GRP_INVALID: + return ""; + + case ETCA_GRP_JUMP: + return "jump"; + case ETCA_GRP_CALL: + return "call"; + case ETCA_GRP_PRIV: + return "privileged"; + } +#else + return NULL; +#endif +} diff --git a/arch/Etca/EtcaInstPrinter.h b/arch/Etca/EtcaInstPrinter.h new file mode 100644 index 0000000000..41e407e486 --- /dev/null +++ b/arch/Etca/EtcaInstPrinter.h @@ -0,0 +1,22 @@ +/* Capstone Disassembly Engine */ +/* By Alexander Nutz, 2025 */ + +#ifndef CS_ETCA_INSTPRINTER_H +#define CS_ETCA_INSTPRINTER_H + +#include "capstone/capstone.h" +#include "../../utils.h" +#include "../../MCInst.h" +#include "../../SStream.h" +#include "../../cs_priv.h" +#include "EtcaDisassembler.h" + +struct SStream; + +void Etca_printInst(MCInst *MI, struct SStream *O, void *Info); +const char *Etca_reg_name(csh handle, unsigned int reg); +void Etca_get_insn_id(cs_struct *h, cs_insn *insn, unsigned int id); +const char *Etca_insn_name(csh handle, unsigned int id); +const char *Etca_group_name(csh handle, unsigned int id); + +#endif diff --git a/arch/Etca/EtcaModule.c b/arch/Etca/EtcaModule.c new file mode 100644 index 0000000000..3f25468e1a --- /dev/null +++ b/arch/Etca/EtcaModule.c @@ -0,0 +1,41 @@ +/* Capstone Disassembly Engine */ +/* By Alexander Nutz, 2025 */ + +#ifdef CAPSTONE_HAS_ETCA + +#include "../../cs_priv.h" +#include "EtcaDisassembler.h" +#include "EtcaInstPrinter.h" +#include "EtcaModule.h" + +cs_err Etca_global_init(cs_struct *ud) +{ + etca_info *info; + + info = cs_mem_malloc(sizeof(etca_info)); + if (!info) { + return CS_ERR_MEM; + } + + ud->printer = Etca_printInst; + ud->printer_info = info; + ud->getinsn_info = info; + ud->reg_name = Etca_reg_name; + ud->insn_id = Etca_get_insn_id; + ud->insn_name = Etca_insn_name; + ud->group_name = Etca_group_name; + ud->disasm = Etca_getInstruction; + ud->post_printer = NULL; +#ifndef CAPSTONE_DIET + ud->reg_access = Etca_reg_access; +#endif + + return CS_ERR_OK; +} + +cs_err Etca_option(cs_struct *handle, cs_opt_type type, size_t value) +{ + return CS_ERR_OK; +} + +#endif diff --git a/arch/Etca/EtcaModule.h b/arch/Etca/EtcaModule.h new file mode 100644 index 0000000000..b91669d57c --- /dev/null +++ b/arch/Etca/EtcaModule.h @@ -0,0 +1,12 @@ +/* Capstone Disassembly Engine */ +/* By Alexander Nutz, 2025 */ + +#ifndef CS_ETCA_MODULE_H +#define CS_ETCA_MODULE_H + +#include "../../utils.h" + +cs_err Etca_global_init(cs_struct *ud); +cs_err Etca_option(cs_struct *handle, cs_opt_type type, size_t value); + +#endif diff --git a/bindings/const_generator.py b/bindings/const_generator.py index b2f9cefebc..b626822886 100644 --- a/bindings/const_generator.py +++ b/bindings/const_generator.py @@ -8,7 +8,7 @@ include = ['arm.h', 'aarch64.h', 'm68k.h', 'mips.h', 'x86.h', 'ppc.h', 'sparc.h', 'systemz.h', 'xcore.h', 'tms320c64x.h', 'm680x.h', 'evm.h', 'mos65xx.h', 'wasm.h', 'bpf.h', 'riscv.h', 'sh.h', 'tricore.h', - 'alpha.h', 'hppa.h', 'loongarch.h', 'arc.h', 'xtensa.h'] + 'alpha.h', 'hppa.h', 'loongarch.h', 'arc.h', 'xtensa.h', 'etca.h'] template = { 'java': { @@ -28,6 +28,7 @@ 'tms320c64x.h': 'TMS320C64x', 'm680x.h': 'M680x', 'evm.h': 'Evm', + 'etca.h': 'Etca', 'wasm.h': 'Wasm', 'comment_open': '\t//', 'comment_close': '', diff --git a/cs.c b/cs.c index 80ade811fb..9bd285da35 100644 --- a/cs.c +++ b/cs.c @@ -78,6 +78,7 @@ #include "arch/LoongArch/LoongArchModule.h" #include "arch/Xtensa/XtensaModule.h" #include "arch/ARC/ARCModule.h" +#include "arch/Etca/EtcaModule.h" typedef struct cs_arch_config { // constructor initialization @@ -91,155 +92,183 @@ typedef struct cs_arch_config { #define CS_ARCH_CONFIG_ARM \ { \ - ARM_global_init, ARM_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_ARM | CS_MODE_V8 | \ - CS_MODE_MCLASS | CS_MODE_THUMB | \ - CS_MODE_BIG_ENDIAN), \ + ARM_global_init, \ + ARM_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_ARM | CS_MODE_V8 | \ + CS_MODE_MCLASS | CS_MODE_THUMB | CS_MODE_BIG_ENDIAN), \ } #define CS_ARCH_CONFIG_AARCH64 \ { \ - AArch64_global_init, AArch64_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_ARM | \ - CS_MODE_BIG_ENDIAN | CS_MODE_APPLE_PROPRIETARY), \ + AArch64_global_init, \ + AArch64_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_ARM | CS_MODE_BIG_ENDIAN | \ + CS_MODE_APPLE_PROPRIETARY), \ } #define CS_ARCH_CONFIG_MIPS \ { \ - Mips_global_init, Mips_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BIG_ENDIAN | \ - CS_MODE_MIPS16 | CS_MODE_MIPS32 | CS_MODE_MIPS64 | \ - CS_MODE_MICRO | CS_MODE_MIPS1 | CS_MODE_MIPS2 | \ - CS_MODE_MIPS32R2 | CS_MODE_MIPS32R3 | \ - CS_MODE_MIPS32R5 | CS_MODE_MIPS32R6 | \ - CS_MODE_MIPS3 | CS_MODE_MIPS4 | CS_MODE_MIPS5 | \ - CS_MODE_MIPS64R2 | CS_MODE_MIPS64R3 | \ - CS_MODE_MIPS64R5 | CS_MODE_MIPS64R6 | \ - CS_MODE_OCTEON | CS_MODE_OCTEONP | \ - CS_MODE_NANOMIPS | CS_MODE_NMS1 | CS_MODE_I7200 | \ - CS_MODE_MIPS_NOFLOAT | CS_MODE_MIPS_PTR64), \ + Mips_global_init, \ + Mips_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BIG_ENDIAN | \ + CS_MODE_MIPS16 | CS_MODE_MIPS32 | CS_MODE_MIPS64 | \ + CS_MODE_MICRO | CS_MODE_MIPS1 | CS_MODE_MIPS2 | \ + CS_MODE_MIPS32R2 | CS_MODE_MIPS32R3 | CS_MODE_MIPS32R5 | \ + CS_MODE_MIPS32R6 | CS_MODE_MIPS3 | CS_MODE_MIPS4 | \ + CS_MODE_MIPS5 | CS_MODE_MIPS64R2 | CS_MODE_MIPS64R3 | \ + CS_MODE_MIPS64R5 | CS_MODE_MIPS64R6 | CS_MODE_OCTEON | \ + CS_MODE_OCTEONP | CS_MODE_NANOMIPS | CS_MODE_NMS1 | \ + CS_MODE_I7200 | CS_MODE_MIPS_NOFLOAT | CS_MODE_MIPS_PTR64), \ } #define CS_ARCH_CONFIG_X86 \ { \ - X86_global_init, X86_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_32 | CS_MODE_64 | \ - CS_MODE_16), \ + X86_global_init, \ + X86_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_32 | CS_MODE_64 | \ + CS_MODE_16), \ } #define CS_ARCH_CONFIG_PPC \ { \ - PPC_global_init, PPC_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_32 | CS_MODE_64 | \ - CS_MODE_BIG_ENDIAN | CS_MODE_QPX | CS_MODE_PS | \ - CS_MODE_BOOKE | CS_MODE_SPE | CS_MODE_AIX_OS | \ - CS_MODE_PWR7 | CS_MODE_PWR8 | CS_MODE_PWR9 | \ - CS_MODE_PWR10 | CS_MODE_PPC_ISA_FUTURE | \ - CS_MODE_MSYNC | CS_MODE_MODERN_AIX_AS), \ + PPC_global_init, \ + PPC_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_32 | CS_MODE_64 | \ + CS_MODE_BIG_ENDIAN | CS_MODE_QPX | CS_MODE_PS | \ + CS_MODE_BOOKE | CS_MODE_SPE | CS_MODE_AIX_OS | \ + CS_MODE_PWR7 | CS_MODE_PWR8 | CS_MODE_PWR9 | CS_MODE_PWR10 | \ + CS_MODE_PPC_ISA_FUTURE | CS_MODE_MSYNC | \ + CS_MODE_MODERN_AIX_AS), \ } #define CS_ARCH_CONFIG_SPARC \ { \ - Sparc_global_init, Sparc_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BIG_ENDIAN | \ - CS_MODE_V9 | CS_MODE_64 | CS_MODE_32), \ + Sparc_global_init, \ + Sparc_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BIG_ENDIAN | CS_MODE_V9 | \ + CS_MODE_64 | CS_MODE_32), \ } #define CS_ARCH_CONFIG_SYSTEMZ \ { \ - SystemZ_global_init, SystemZ_option, \ - ~(CS_MODE_BIG_ENDIAN | CS_MODE_SYSTEMZ_ARCH8 | \ - CS_MODE_SYSTEMZ_ARCH9 | CS_MODE_SYSTEMZ_ARCH10 | \ - CS_MODE_SYSTEMZ_ARCH11 | CS_MODE_SYSTEMZ_ARCH12 | \ - CS_MODE_SYSTEMZ_ARCH13 | CS_MODE_SYSTEMZ_ARCH14 | \ - CS_MODE_SYSTEMZ_Z10 | CS_MODE_SYSTEMZ_Z196 | \ - CS_MODE_SYSTEMZ_ZEC12 | CS_MODE_SYSTEMZ_Z13 | \ - CS_MODE_SYSTEMZ_Z14 | CS_MODE_SYSTEMZ_Z15 | \ - CS_MODE_SYSTEMZ_Z16 | CS_MODE_SYSTEMZ_GENERIC), \ + SystemZ_global_init, \ + SystemZ_option, \ + ~(CS_MODE_BIG_ENDIAN | CS_MODE_SYSTEMZ_ARCH8 | \ + CS_MODE_SYSTEMZ_ARCH9 | CS_MODE_SYSTEMZ_ARCH10 | \ + CS_MODE_SYSTEMZ_ARCH11 | CS_MODE_SYSTEMZ_ARCH12 | \ + CS_MODE_SYSTEMZ_ARCH13 | CS_MODE_SYSTEMZ_ARCH14 | \ + CS_MODE_SYSTEMZ_Z10 | CS_MODE_SYSTEMZ_Z196 | \ + CS_MODE_SYSTEMZ_ZEC12 | CS_MODE_SYSTEMZ_Z13 | \ + CS_MODE_SYSTEMZ_Z14 | CS_MODE_SYSTEMZ_Z15 | \ + CS_MODE_SYSTEMZ_Z16 | CS_MODE_SYSTEMZ_GENERIC), \ } #define CS_ARCH_CONFIG_XCORE \ { \ - XCore_global_init, XCore_option, ~(CS_MODE_BIG_ENDIAN), \ + XCore_global_init, \ + XCore_option, \ + ~(CS_MODE_BIG_ENDIAN), \ } #define CS_ARCH_CONFIG_M68K \ { \ - M68K_global_init, M68K_option, \ - ~(CS_MODE_BIG_ENDIAN | CS_MODE_M68K_000 | \ - CS_MODE_M68K_010 | CS_MODE_M68K_020 | \ - CS_MODE_M68K_030 | CS_MODE_M68K_040 | \ - CS_MODE_M68K_060), \ + M68K_global_init, \ + M68K_option, \ + ~(CS_MODE_BIG_ENDIAN | CS_MODE_M68K_000 | CS_MODE_M68K_010 | \ + CS_MODE_M68K_020 | CS_MODE_M68K_030 | CS_MODE_M68K_040 | \ + CS_MODE_M68K_060), \ } #define CS_ARCH_CONFIG_TMS320C64X \ { \ - TMS320C64x_global_init, TMS320C64x_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BIG_ENDIAN), \ + TMS320C64x_global_init, \ + TMS320C64x_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BIG_ENDIAN), \ } #define CS_ARCH_CONFIG_M680X \ { \ - M680X_global_init, M680X_option, \ - ~(CS_MODE_M680X_6301 | CS_MODE_M680X_6309 | \ - CS_MODE_M680X_6800 | CS_MODE_M680X_6801 | \ - CS_MODE_M680X_6805 | CS_MODE_M680X_6808 | \ - CS_MODE_M680X_6809 | CS_MODE_M680X_6811 | \ - CS_MODE_M680X_CPU12 | CS_MODE_M680X_HCS08), \ + M680X_global_init, \ + M680X_option, \ + ~(CS_MODE_M680X_6301 | CS_MODE_M680X_6309 | \ + CS_MODE_M680X_6800 | CS_MODE_M680X_6801 | \ + CS_MODE_M680X_6805 | CS_MODE_M680X_6808 | \ + CS_MODE_M680X_6809 | CS_MODE_M680X_6811 | \ + CS_MODE_M680X_CPU12 | CS_MODE_M680X_HCS08), \ } #define CS_ARCH_CONFIG_EVM \ { \ - EVM_global_init, EVM_option, 0, \ + EVM_global_init, \ + EVM_option, \ + 0, \ } #define CS_ARCH_CONFIG_MOS65XX \ { \ - MOS65XX_global_init, MOS65XX_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_MOS65XX_6502 | \ - CS_MODE_MOS65XX_65C02 | CS_MODE_MOS65XX_W65C02 | \ - CS_MODE_MOS65XX_65816_LONG_MX), \ + MOS65XX_global_init, \ + MOS65XX_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_MOS65XX_6502 | \ + CS_MODE_MOS65XX_65C02 | CS_MODE_MOS65XX_W65C02 | \ + CS_MODE_MOS65XX_65816_LONG_MX), \ } #define CS_ARCH_CONFIG_WASM \ { \ - WASM_global_init, WASM_option, 0, \ + WASM_global_init, \ + WASM_option, \ + 0, \ } #define CS_ARCH_CONFIG_BPF \ { \ - BPF_global_init, BPF_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BPF_CLASSIC | \ - CS_MODE_BPF_EXTENDED | CS_MODE_BIG_ENDIAN), \ + BPF_global_init, \ + BPF_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BPF_CLASSIC | \ + CS_MODE_BPF_EXTENDED | CS_MODE_BIG_ENDIAN), \ } #define CS_ARCH_CONFIG_RISCV \ { \ - RISCV_global_init, RISCV_option, \ - ~(CS_MODE_RISCV32 | CS_MODE_RISCV64 | CS_MODE_RISCVC), \ + RISCV_global_init, \ + RISCV_option, \ + ~(CS_MODE_RISCV32 | CS_MODE_RISCV64 | CS_MODE_RISCVC), \ } #define CS_ARCH_CONFIG_SH \ { \ - SH_global_init, SH_option, \ - ~(CS_MODE_SH2 | CS_MODE_SH2A | CS_MODE_SH3 | \ - CS_MODE_SH4 | CS_MODE_SH4A | CS_MODE_SHFPU | \ - CS_MODE_SHDSP | CS_MODE_BIG_ENDIAN), \ + SH_global_init, \ + SH_option, \ + ~(CS_MODE_SH2 | CS_MODE_SH2A | CS_MODE_SH3 | CS_MODE_SH4 | \ + CS_MODE_SH4A | CS_MODE_SHFPU | CS_MODE_SHDSP | \ + CS_MODE_BIG_ENDIAN), \ } #define CS_ARCH_CONFIG_TRICORE \ { \ - TRICORE_global_init, TRICORE_option, \ - ~(CS_MODE_TRICORE_110 | CS_MODE_TRICORE_120 | \ - CS_MODE_TRICORE_130 | CS_MODE_TRICORE_131 | \ - CS_MODE_TRICORE_160 | CS_MODE_TRICORE_161 | \ - CS_MODE_TRICORE_162 | CS_MODE_TRICORE_180 | \ - CS_MODE_LITTLE_ENDIAN), \ + TRICORE_global_init, \ + TRICORE_option, \ + ~(CS_MODE_TRICORE_110 | CS_MODE_TRICORE_120 | \ + CS_MODE_TRICORE_130 | CS_MODE_TRICORE_131 | \ + CS_MODE_TRICORE_160 | CS_MODE_TRICORE_161 | \ + CS_MODE_TRICORE_162 | CS_MODE_TRICORE_180 | \ + CS_MODE_LITTLE_ENDIAN), \ + } +#define CS_ARCH_CONFIG_ETCA \ + { \ + Etca_global_init, \ + Etca_option, \ + ~(0), \ } #define CS_ARCH_CONFIG_ALPHA \ { \ - ALPHA_global_init, ALPHA_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BIG_ENDIAN), \ + ALPHA_global_init, \ + ALPHA_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BIG_ENDIAN), \ } #define CS_ARCH_CONFIG_LOONGARCH \ { \ - LoongArch_global_init, LoongArch_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_LOONGARCH32 | \ - CS_MODE_LOONGARCH64), \ + LoongArch_global_init, \ + LoongArch_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_LOONGARCH32 | \ + CS_MODE_LOONGARCH64), \ } #define CS_ARCH_CONFIG_XTENSA \ { \ - Xtensa_global_init, Xtensa_option, \ - ~(CS_MODE_XTENSA_ESP32 | CS_MODE_XTENSA_ESP32S2 | \ - CS_MODE_XTENSA_ESP8266), \ + Xtensa_global_init, \ + Xtensa_option, \ + ~(CS_MODE_XTENSA_ESP32 | CS_MODE_XTENSA_ESP32S2 | \ + CS_MODE_XTENSA_ESP8266), \ } #define CS_ARCH_CONFIG_ARC \ { \ - ARC_global_init, ARC_option, ~(CS_MODE_LITTLE_ENDIAN), \ + ARC_global_init, \ + ARC_option, \ + ~(CS_MODE_LITTLE_ENDIAN), \ } #ifdef CAPSTONE_USE_ARCH_REGISTRATION @@ -367,6 +396,11 @@ static const cs_arch_config arch_configs[MAX_ARCH] = { #else { NULL, NULL, 0 }, #endif +#ifdef CAPSTONE_HAS_ETCA + CS_ARCH_CONFIG_ETCA, +#else + { NULL, NULL, 0 }, +#endif }; // bitmask of enabled architectures @@ -422,6 +456,9 @@ static const uint32_t all_arch = 0 #ifdef CAPSTONE_HAS_SH | (1 << CS_ARCH_SH) #endif +#ifdef CAPSTONE_HAS_ETCA + | (1 << CS_ARCH_ETCA) +#endif #ifdef CAPSTONE_HAS_TRICORE | (1 << CS_ARCH_TRICORE) #endif @@ -690,7 +727,7 @@ bool CAPSTONE_API cs_support(int query) (1 << CS_ARCH_SH) | (1 << CS_ARCH_TRICORE) | (1 << CS_ARCH_ALPHA) | (1 << CS_ARCH_HPPA) | (1 << CS_ARCH_LOONGARCH) | (1 << CS_ARCH_XTENSA) | - (1 << CS_ARCH_ARC)); + (1 << CS_ARCH_ARC) | (1 << CS_ARCH_ETCA)); if ((unsigned int)query < CS_ARCH_MAX) return all_arch & (1 << query); @@ -999,6 +1036,8 @@ static uint8_t skipdata_size(cs_struct *handle) return 4; case CS_ARCH_SH: return 2; + case CS_ARCH_ETCA: + return 1; case CS_ARCH_TRICORE: // TriCore instruction's length can be 2 or 4 bytes, // so we just skip 2 bytes @@ -1783,6 +1822,12 @@ int CAPSTONE_API cs_op_count(csh ud, const cs_insn *insn, unsigned int op_type) (mos65xx_op_type)op_type) count++; break; + case CS_ARCH_ETCA: + for (i = 0; i < insn->detail->etca.op_count; i++) + if (insn->detail->etca.operands[i].type == + (cs_etca_op_type)op_type) + count++; + break; case CS_ARCH_WASM: for (i = 0; i < insn->detail->wasm.op_count; i++) if (insn->detail->wasm.operands[i].type == @@ -2031,6 +2076,15 @@ int CAPSTONE_API cs_op_index(csh ud, const cs_insn *insn, unsigned int op_type, return i; } break; + case CS_ARCH_ETCA: + for (i = 0; i < insn->detail->etca.op_count; i++) { + if (insn->detail->etca.operands[i].type == + (cs_etca_op_type)op_type) + count++; + if (count == post) + return i; + } + break; case CS_ARCH_ALPHA: for (i = 0; i < insn->detail->alpha.op_count; i++) { if (insn->detail->alpha.operands[i].type == diff --git a/cstool/cstool.c b/cstool/cstool.c index c341f7fb0f..2cee740c8d 100644 --- a/cstool/cstool.c +++ b/cstool/cstool.c @@ -339,6 +339,8 @@ static struct { { "evm", "ethereum virtual machine", CS_ARCH_EVM, 0 }, + { "etca", "ETC.a", CS_ARCH_ETCA, 0 }, + { "wasm", "web assembly", CS_ARCH_WASM, 0 }, { "bpf", "Classic BPF, little endian", CS_ARCH_BPF, @@ -504,6 +506,8 @@ static const char *get_arch_name(cs_arch arch) return "M680X"; case CS_ARCH_EVM: return "Evm"; + case CS_ARCH_ETCA: + return "Etca"; case CS_ARCH_MOS65XX: return "MOS65XX"; case CS_ARCH_WASM: @@ -620,6 +624,9 @@ static void print_details(csh handle, cs_arch arch, cs_mode md, cs_insn *ins) case CS_ARCH_EVM: print_insn_detail_evm(handle, ins); break; + case CS_ARCH_ETCA: + print_insn_detail_etca(handle, ins); + break; case CS_ARCH_WASM: print_insn_detail_wasm(handle, ins); break; @@ -803,6 +810,10 @@ int main(int argc, char **argv) printf("evm=1 "); } + if (cs_support(CS_ARCH_ETCA)) { + printf("etca=1 "); + } + if (cs_support(CS_ARCH_WASM)) { printf("wasm=1 "); } diff --git a/cstool/cstool.h b/cstool/cstool.h index 2d005bc9a0..9d42c99f8a 100644 --- a/cstool/cstool.h +++ b/cstool/cstool.h @@ -24,5 +24,6 @@ void print_insn_detail_hppa(csh handle, cs_insn *ins); void print_insn_detail_loongarch(csh handle, cs_insn *ins); void print_insn_detail_xtensa(csh handle, cs_insn *ins); void print_insn_detail_arc(csh handle, cs_insn *ins); +void print_insn_detail_etca(csh handle, cs_insn *ins); #endif //CAPSTONE_CSTOOL_CSTOOL_H_ diff --git a/cstool/cstool_etca.c b/cstool/cstool_etca.c new file mode 100644 index 0000000000..82731eb475 --- /dev/null +++ b/cstool/cstool_etca.c @@ -0,0 +1,14 @@ +#include +#include + +#include +#include "cstool.h" + +void print_insn_detail_etca(csh handle, cs_insn *ins) +{ + // detail can be NULL on "data" instruction if SKIPDATA option is turned ON + if (ins->detail == NULL) + return; + + // TODO +} diff --git a/include/capstone/capstone.h b/include/capstone/capstone.h index f8d07338e1..770730cc78 100644 --- a/include/capstone/capstone.h +++ b/include/capstone/capstone.h @@ -105,6 +105,7 @@ typedef enum cs_arch { CS_ARCH_LOONGARCH, ///< LoongArch architecture CS_ARCH_XTENSA, ///< Xtensa architecture CS_ARCH_ARC, ///< ARC architecture + CS_ARCH_ETCA, ///< ETC.a architecture CS_ARCH_MAX, CS_ARCH_ALL = 0xFFFF, // All architectures - for cs_support() } cs_arch; @@ -226,6 +227,7 @@ typedef enum cs_mode { CS_MODE_SH4A = 1 << 5, ///< SH4A CS_MODE_SHFPU = 1 << 6, ///< w/ FPU CS_MODE_SHDSP = 1 << 7, ///< w/ DSP + CS_MODE_ETCA = 1 << 1, CS_MODE_TRICORE_110 = 1 << 1, ///< Tricore 1.1 CS_MODE_TRICORE_120 = 1 << 2, ///< Tricore 1.2 CS_MODE_TRICORE_130 = 1 << 3, ///< Tricore 1.3 @@ -439,6 +441,7 @@ typedef struct cs_opt_skipdata { #include "mos65xx.h" #include "bpf.h" #include "sh.h" +#include "etca.h" #include "tricore.h" #include "alpha.h" #include "hppa.h" @@ -497,6 +500,7 @@ typedef struct cs_detail { cs_bpf bpf; ///< Berkeley Packet Filter architecture (including eBPF) cs_riscv riscv; ///< RISCV architecture cs_sh sh; ///< SH architecture + cs_etca etca; ///< ETC.a architecture cs_tricore tricore; ///< TriCore architecture cs_alpha alpha; ///< Alpha architecture cs_hppa hppa; ///< HPPA architecture diff --git a/include/capstone/etca.h b/include/capstone/etca.h new file mode 100644 index 0000000000..ae85c32e85 --- /dev/null +++ b/include/capstone/etca.h @@ -0,0 +1,231 @@ +#ifndef CAPSTONE_ETCA_H +#define CAPSTONE_ETCA_H + +/* Capstone Disassembly Engine */ +/* By Alexander Nutz, 2025 */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "platform.h" +#include "cs_operand.h" + +#ifdef _MSC_VER +#pragma warning(disable : 4201) +#endif + +/// ETCA registers and special registers +typedef enum { + ETCA_REG_INVALID = 0, + + ETCA_REG_FIRST_BASE, + ETCA_REG_R0 = ETCA_REG_FIRST_BASE, + ETCA_REG_R1, + ETCA_REG_R2, + ETCA_REG_R3, + ETCA_REG_R4, + ETCA_REG_R5, + ETCA_REG_R6, + ETCA_REG_R7, + ETCA_REG_LAST_BASE = ETCA_REG_R7, + + ETCA_REG_FIRST_REX, + ETCA_REG_R8 = ETCA_REG_FIRST_REX, + ETCA_REG_R9, + ETCA_REG_R10, + ETCA_REG_R11, + ETCA_REG_R12, + ETCA_REG_R13, + ETCA_REG_R14, + ETCA_REG_R15, + ETCA_REG_LAST_REX = ETCA_REG_R15, + + ETCA_REG_ENDING, +} cs_etca_reg; + +typedef enum { + ETCA_OP_INVALID = CS_OP_INVALID, + ETCA_OP_REG = CS_OP_REG, // register operand + ETCA_OP_IMM = CS_OP_IMM, // (possibly full-) immediate operand + ETCA_OP_MEM = CS_OP_MEM, // only for MO1/MO2 memory operands +} cs_etca_op_type; + +// index + base + displacement +typedef struct { + // (1 << index_multiplier_log2) * reg[index] + struct { + bool enabled; + cs_etca_reg index; + // 2^0=1, 2^1=2, 2^2=4, 2^3=8 + uint8_t index_multiplier_log2 : 2; + } index; + + // reg[base] + struct { + bool enabled; + cs_etca_reg base; + } base; + + int32_t displacement; +} cs_etca_op_mem; + +// Instruction operand +typedef struct { + cs_etca_op_type type; + union { + uint64_t imm; // when ETCA_OP_IMM; after sign extensions + cs_etca_reg reg; // when ETCA_OP_REG + cs_etca_op_mem mem; // when ETCA_OP_MEM + }; +} cs_etca_op; + +typedef enum etca_insn { + ETCA_INS_INVALID = 0, + + ETCA_INS_NOP, + + ETCA_INS_REL_JMP, + ETCA_INS_ABS_JMP, + ETCA_INS_REL_CALL, + ETCA_INS_ABS_CALL, + + ETCA_INS_ADD, + ETCA_INS_SUB, + ETCA_INS_RSUB, + ETCA_INS_CMP, + ETCA_INS_OR, + ETCA_INS_XOR, + ETCA_INS_AND, + ETCA_INS_TEST, + ETCA_INS_MOVZ, + ETCA_INS_MOVS, + ETCA_INS_LOAD, + ETCA_INS_STORE, + ETCA_INS_SLO, + + ETCA_INS_READCR, + ETCA_INS_WRITECR, + ETCA_INS_SYSCALL, + ETCA_INS_ERET, + ETCA_INS_WAIT, + + ETCA_INS_PUSH, + ETCA_INS_POP, + ETCA_INS_LEA, + ETCA_INS_ADC, + ETCA_INS_SBB, + ETCA_INS_RSBB, + ETCA_INS_ASR, + ETCA_INS_ROL, + ETCA_INS_ROR, + ETCA_INS_SHL, + ETCA_INS_SHR, + ETCA_INS_RCL, + ETCA_INS_RCR, + ETCA_INS_POPCNT, + ETCA_INS_GREV, + ETCA_INS_CTZ, + ETCA_INS_CLZ, + ETCA_INS_NOT, + ETCA_INS_ANDN, + ETCA_INS_UDIV, + ETCA_INS_SDIV, + ETCA_INS_UREM, + ETCA_INS_SREM, + ETCA_INS_UMUL, + ETCA_INS_SMUL, + ETCA_INS_UHMUL, + ETCA_INS_SHMUL, + ETCA_INS_LSB, + ETCA_INS_LSBMSK, + ETCA_INS_RLSB, + ETCA_INS_ZHIB, + + ETCA_INS_CACHE_FLUSH_ALL, + ETCA_INS_DATA_PREFETCH, + ETCA_INS_INSTRUCTION_PREFETCH, + ETCA_INS_DCACHE_FLUSH, + ETCA_INS_ICACHE_INVALIDATE, + ETCA_INS_CACHE_INVALIDATE_ALL, + ETCA_INS_DCACHE_INVALIDATE, + ETCA_INS_ALLOC_ZERO, + + ETCA_INS_ENDING, +} etca_insn; + +#define ETCA_MAX_NUM_OP 2 + +// first bit negates the cond +#define ETCA_COND_Z (0 << 1) +#define ETCA_COND_N (1 << 1) +#define ETCA_COND_C (2 << 1) +#define ETCA_COND_O (3 << 1) +#define ETCA_COND_BE (4 << 1) +#define ETCA_COND_L (5 << 1) +#define ETCA_COND_LE (6 << 1) +#define ETCA_COND_ALWAYS (7 << 1) + +#define ETCA_COND_E ETCA_COND_Z +#define ETCA_COND_B ETCA_COND_C + +#define ETCA_COND_NZ (1 | ETCA_COND_Z) +#define ETCA_COND_NN (1 | ETCA_COND_N) +#define ETCA_COND_NC (1 | ETCA_COND_C) +#define ETCA_COND_NO (1 | ETCA_COND_O) +#define ETCA_COND_A (1 | ETCA_COND_BE) +#define ETCA_COND_GE (1 | ETCA_COND_L) +#define ETCA_COND_G (1 | ETCA_COND_LE) +#define ETCA_COND_NEVER (1 | ETCA_COND_ALWAYS) + +#define ETCA_COND_NE (1 | ETCA_COND_E) +#define ETCA_COND_AE (1 | ETCA_COND_C) + +char const *cs_etca_cond_name(uint8_t cond); + +typedef enum { + ETCA_CR_CPUID1 = 0x00, + ETCA_CR_CPUID2 = 0x01, + ETCA_CR_FEAT = 0x02, + ETCA_CR_FLAGS = 0x03, + ETCA_CR_INT_PC = 0x04, + ETCA_CR_INT_RET_PC = 0x05, + ETCA_CR_INT_MASK = 0x06, + ETCA_CR_INT_PENDING = 0x07, + ETCA_CR_INT_CAUSE = 0x08, + ETCA_CR_INT_DATA = 0x09, + ETCA_CR_INT_SCRATCH_0 = 0x0A, + ETCA_CR_INT_SCRATCH_1 = 0x0B, + ETCA_CR_PRIV = 0x0C, + ETCA_CR_INT_RET_PRIV = 0x0D, + ETCA_CR_CACHE_LINE_SIZE = 0x0E, + ETCA_CR_NO_CACHE_START = 0x0F, + ETCA_CR_NO_CACHE_END = 0x10, + ETCA_CR_MODE = 0x11, +} cs_etca_cr; + +char const *cs_etca_cr_name(cs_etca_cr cr); + +/// Instruction structure +typedef struct cs_etca { + etca_insn insn : 8; + uint8_t ss : 2; + uint8_t cond : 4; + uint8_t op_count : 2; + cs_etca_op operands[ETCA_MAX_NUM_OP]; +} cs_etca; + +// instructions can have multiple groups +typedef enum { + ETCA_GRP_INVALID = 0, + ETCA_GRP_JUMP, + ETCA_GRP_CALL, + ETCA_GRP_PRIV, + ETCA_GRP_ENDING, +} cs_etca_insn_group; + +#ifdef __cplusplus +} +#endif + +#endif From 0b96b04e71023b9aea62a67538b5bcf0dcf8a28d Mon Sep 17 00:00:00 2001 From: Alexander Nutz Date: Fri, 31 Oct 2025 21:48:15 +0100 Subject: [PATCH 2/8] improve printing of jmp & call ops --- arch/Etca/EtcaDisassembler.c | 30 ++++++++++++++++++++++-------- arch/Etca/EtcaInstPrinter.c | 30 ++++++++++++++++++++++-------- 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/arch/Etca/EtcaDisassembler.c b/arch/Etca/EtcaDisassembler.c index f20baf7eee..38d023fdd3 100644 --- a/arch/Etca/EtcaDisassembler.c +++ b/arch/Etca/EtcaDisassembler.c @@ -44,6 +44,11 @@ typedef struct { bool x : 1; } pfx_rex; + struct { + bool present; + uint8_t a : 3; + } single_reg; + struct { bool present; uint8_t a : 3; @@ -289,8 +294,8 @@ static bool parseCoreOp(DecodeIsntCtx *ctx, const uint8_t **code_p, } else if (code_len >= 2 && code[0] >> 6 == 0 && (code[0] & 0xF) == 0xF && (code[1] << 3) >> 3 == 0) { ctx->insn = ETCA_INS_ALLOC_ZERO; - ctx->abm.present = true; - ctx->abm.a = code[1] >> 5; + ctx->single_reg.present = true; + ctx->single_reg.a = code[1] >> 5; code += 2; code_len -= 2; @@ -298,8 +303,8 @@ static bool parseCoreOp(DecodeIsntCtx *ctx, const uint8_t **code_p, } else if (code_len >= 2 && code[0] >> 6 == 0 && (code[0] & 0xF) == 0xF && (code[1] << 3) >> 3 == 4) { ctx->insn = ETCA_INS_DCACHE_INVALIDATE; - ctx->abm.present = true; - ctx->abm.a = code[1] >> 5; + ctx->single_reg.present = true; + ctx->single_reg.a = code[1] >> 5; code += 2; code_len -= 2; @@ -355,8 +360,8 @@ static bool parseCoreOp(DecodeIsntCtx *ctx, const uint8_t **code_p, } // clang-format on - ctx->abm.present = true; - ctx->abm.a = code[1] >> 5; + ctx->single_reg.present = true; + ctx->single_reg.a = code[1] >> 5; code += 2; code_len -= 2; @@ -372,8 +377,8 @@ static bool parseCoreOp(DecodeIsntCtx *ctx, const uint8_t **code_p, ctx->cond = code[1] & 0xF; - ctx->abm.present = true; - ctx->abm.a = code[1] >> 5; + ctx->single_reg.present = true; + ctx->single_reg.a = code[1] >> 5; code += 2; code_len -= 2; @@ -588,6 +593,15 @@ bool Etca_getInstruction(csh ud, const uint8_t *code, size_t code_len, // TODO: mo1 & mo2 if (ctx.abm.m != 0) return false; + } else if (ctx.single_reg.present) { + info->op.op_count = 1; + + info->op.operands[0].type = ETCA_OP_REG; + info->op.operands[0].reg = + ((ctx.pfx_rex.present && ctx.pfx_rex.a) ? + ETCA_REG_FIRST_REX : + ETCA_REG_FIRST_BASE) + + ctx.single_reg.a; } // TODO: add_group diff --git a/arch/Etca/EtcaInstPrinter.c b/arch/Etca/EtcaInstPrinter.c index 483d76b463..61cbc60aa8 100644 --- a/arch/Etca/EtcaInstPrinter.c +++ b/arch/Etca/EtcaInstPrinter.c @@ -82,9 +82,9 @@ const char *Etca_insn_name(csh handle, unsigned int id) case ETCA_INS_ABS_JMP: return "abs_jmp"; case ETCA_INS_REL_CALL: - return "rel_call"; + return "call"; case ETCA_INS_ABS_CALL: - return "abs_call"; + return "call"; case ETCA_INS_ADD: return "add"; @@ -444,12 +444,26 @@ void Etca_printInst(MCInst *MI, SStream *O, void *infoIn) #ifndef CAPSTONE_DIET etca_info *info = (etca_info *)infoIn; - // first word in buffer has to be mnemonic because of SStream_extract_mnem_opstr - SStream_concat0(O, Etca_insn_name(0, info->op.insn)); - - if (info->op.cond != ETCA_COND_ALWAYS) { - SStream_concat(O, " when %s, ", - cs_etca_cond_name(info->op.cond)); + // first word in buffer has to be mnemonic because of SStream_extract_mnem_opstr!! + + if ((info->op.insn == ETCA_INS_ABS_JMP || + info->op.insn == ETCA_INS_REL_JMP) && + info->op.cond != ETCA_COND_ALWAYS) { + SStream_concat1(O, 'j'); + SStream_concat0(O, cs_etca_cond_name(info->op.cond)); + } else { + SStream_concat0(O, Etca_insn_name(0, info->op.insn)); + if (info->op.cond != ETCA_COND_ALWAYS) { + if (info->op.insn == ETCA_INS_ABS_CALL || + info->op.insn == ETCA_INS_REL_CALL) { + SStream_concat0( + O, cs_etca_cond_name(info->op.cond)); + } else { + SStream_concat( + O, " when %s, ", + cs_etca_cond_name(info->op.cond)); + } + } } if (isSizedInsn(info->op.insn)) { From e058ad6dd6b856842888f9883910c0499eaadf32 Mon Sep 17 00:00:00 2001 From: Alexander Nutz Date: Fri, 31 Oct 2025 21:50:01 +0100 Subject: [PATCH 3/8] improve printing of jmp & call ops again --- arch/Etca/EtcaInstPrinter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/Etca/EtcaInstPrinter.c b/arch/Etca/EtcaInstPrinter.c index 61cbc60aa8..7826255fea 100644 --- a/arch/Etca/EtcaInstPrinter.c +++ b/arch/Etca/EtcaInstPrinter.c @@ -78,9 +78,9 @@ const char *Etca_insn_name(csh handle, unsigned int id) return "nop"; case ETCA_INS_REL_JMP: - return "rel_jmp"; + return "jmp"; case ETCA_INS_ABS_JMP: - return "abs_jmp"; + return "jmp"; case ETCA_INS_REL_CALL: return "call"; case ETCA_INS_ABS_CALL: From 74a1652ee59e3c161e845c30aff6b853a744702d Mon Sep 17 00:00:00 2001 From: Alexander Nutz Date: Sat, 1 Nov 2025 15:29:34 +0100 Subject: [PATCH 4/8] wip mo2 & other changes --- arch/Etca/EtcaDisassembler.c | 229 +++++++++++++++++++++++++++++++++-- arch/Etca/EtcaInstPrinter.c | 2 +- cstool/cstool.c | 4 +- include/capstone/capstone.h | 4 +- include/capstone/etca.h | 2 +- 5 files changed, 229 insertions(+), 12 deletions(-) diff --git a/arch/Etca/EtcaDisassembler.c b/arch/Etca/EtcaDisassembler.c index 38d023fdd3..c5ad25f51d 100644 --- a/arch/Etca/EtcaDisassembler.c +++ b/arch/Etca/EtcaDisassembler.c @@ -13,7 +13,6 @@ #include "../../utils.h" #include "EtcaDisassembler.h" #include "capstone/sh.h" -#include /* static void add_group(cs_detail *detail, cs_etca_insn_group group) @@ -474,6 +473,214 @@ static bool parseCoreOp(DecodeIsntCtx *ctx, const uint8_t **code_p, return true; } +typedef struct { + uint8_t scale : 2; + uint8_t index : 3; + uint8_t base : 3; +} sib_byte; + +static sib_byte parseSib(uint8_t b) +{ + return (sib_byte){ b >> 6, (b >> 3) & 3, b & 3 }; +} + +static uint64_t parseMultiByteUInt(uint8_t const *code, size_t nb) +{ + uint64_t imm = 0; + for (size_t i = 0; i < nb; i++) { + imm <<= 8; + imm |= code[i]; + } + return imm; +} + +static bool parseM(etca_info *info, size_t ptrWidthB, DecodeIsntCtx *ctx, + const uint8_t **code_p, size_t *code_len_p, uint16_t *size) +{ + const uint8_t *code = *code_p; + size_t code_len = *code_len_p; + + if (ctx->abm.m == 0) { + /* base */ + } else if (ctx->abm.m == 1 && ctx->abm.b == 0 && ctx->abm.a != 0 && + ctx->abm.a != 4) { + /* from mo2 */ + + if (!code_len) + return false; + sib_byte sib = parseSib(code[0]); + + if (ctx->abm.a == 1) { + /* sib, dP, i8 || [dP], i8 */ + + size_t dPWidth = ptrWidthB; + if (dPWidth == 8 && + !(ctx->pfx_rex.present && ctx->pfx_rex.q)) + dPWidth = 4; + + if (!(code_len >= dPWidth + 2)) + return false; + + uint64_t dP = parseMultiByteUInt(&code[1], dPWidth); + uint8_t i8 = code[1 + dPWidth]; + + cs_etca_op_mem memop = { 0 }; + memop.displacement = dP; + + info->op.operands[0].type = ETCA_OP_MEM; + info->op.operands[0].mem = memop; + + info->op.operands[1].type = ETCA_OP_IMM; + info->op.operands[1].imm = i8; + + code += 2 + ptrWidthB; + code_len -= 2 + ptrWidthB; + (*size) += 2 + ptrWidthB; + } else if (ctx->abm.a == 2) { + /* sib, i8 || [sib.b], i8 */ + + if (!(code_len >= 2)) + return false; + + uint8_t i8 = code[1]; + + cs_etca_op_mem memop = { 0 }; + memop.base.enabled = true; + memop.base.base = sib.base; + + info->op.operands[0].type = ETCA_OP_MEM; + info->op.operands[0].mem = memop; + + info->op.operands[1].type = ETCA_OP_IMM; + info->op.operands[1].imm = i8; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (ctx->abm.a == 3) { + /* sib, dP, i8 || [sib.b + dP], i8 */ + + size_t dPWidth = ptrWidthB; + if (dPWidth == 8 && + !(ctx->pfx_rex.present && ctx->pfx_rex.q)) + dPWidth = 4; + + if (!(code_len >= dPWidth + 2)) + return false; + + uint64_t dP = parseMultiByteUInt(&code[1], dPWidth); + uint8_t i8 = code[1 + dPWidth]; + + cs_etca_op_mem memop = { 0 }; + memop.base.enabled = true; + memop.base.base = sib.base; + memop.displacement = dP; + + info->op.operands[0].type = ETCA_OP_MEM; + info->op.operands[0].mem = memop; + + info->op.operands[1].type = ETCA_OP_IMM; + info->op.operands[1].imm = i8; + + code += 2 + ptrWidthB; + code_len -= 2 + ptrWidthB; + (*size) += 2 + ptrWidthB; + } else if (ctx->abm.a == 5) { + /* sib, dP, i8 || [2^sib.s*sib.x + dP], i8 */ + + size_t dPWidth = ptrWidthB; + if (dPWidth == 8 && + !(ctx->pfx_rex.present && ctx->pfx_rex.q)) + dPWidth = 4; + + if (!(code_len >= dPWidth + 2)) + return false; + + uint64_t dP = parseMultiByteUInt(&code[1], dPWidth); + uint8_t i8 = code[1 + dPWidth]; + + cs_etca_op_mem memop = { 0 }; + memop.index.enabled = true; + memop.index.index = sib.index; + memop.index.index_multiplier_log2 = sib.scale; + memop.displacement = dP; + + info->op.operands[0].type = ETCA_OP_MEM; + info->op.operands[0].mem = memop; + + info->op.operands[1].type = ETCA_OP_IMM; + info->op.operands[1].imm = i8; + + code += 2 + ptrWidthB; + code_len -= 2 + ptrWidthB; + (*size) += 2 + ptrWidthB; + } else if (ctx->abm.a == 6) { + /* sib, i8 || [2^sib.s*sib.x + sib.b], i8 */ + + if (!(code_len >= 2)) + return false; + + uint8_t i8 = code[1]; + + cs_etca_op_mem memop = { 0 }; + memop.index.enabled = true; + memop.index.index = sib.index; + memop.index.index_multiplier_log2 = sib.scale; + memop.base.enabled = true; + memop.base.base = sib.base; + + info->op.operands[0].type = ETCA_OP_MEM; + info->op.operands[0].mem = memop; + + info->op.operands[1].type = ETCA_OP_IMM; + info->op.operands[1].imm = i8; + + code += 2 + ptrWidthB; + code_len -= 2 + ptrWidthB; + (*size) += 2 + ptrWidthB; + } else if (ctx->abm.a == 7) { + /* sib, dP, i8 || [2^sib.s*sib.x + sib.b + dP], i8 */ + + size_t dPWidth = ptrWidthB; + if (dPWidth == 8 && + !(ctx->pfx_rex.present && ctx->pfx_rex.q)) + dPWidth = 4; + + if (!(code_len >= dPWidth + 2)) + return false; + + uint64_t dP = parseMultiByteUInt(&code[1], dPWidth); + uint8_t i8 = code[1 + dPWidth]; + + cs_etca_op_mem memop = { 0 }; + memop.index.enabled = true; + memop.index.index = sib.index; + memop.index.index_multiplier_log2 = sib.scale; + memop.base.enabled = true; + memop.base.base = sib.base; + memop.displacement = dP; + + info->op.operands[0].type = ETCA_OP_MEM; + info->op.operands[0].mem = memop; + + info->op.operands[1].type = ETCA_OP_IMM; + info->op.operands[1].imm = i8; + + code += 2 + ptrWidthB; + code_len -= 2 + ptrWidthB; + (*size) += 2 + ptrWidthB; + } + } + // TODO: finish mo2; mo1 + else + return false; + + *code_len_p = code_len; + *code_p = code; + + return true; +} + // returns true if valid bool Etca_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *mcInstr, uint16_t * /* out */ size, @@ -482,6 +689,17 @@ bool Etca_getInstruction(csh ud, const uint8_t *code, size_t code_len, etca_info *info = infoIn; // cs_detail *detail = mcInstr->flat_insn->detail; + size_t ptrWidthLog2; + // clang-format off + switch (mcInstr->csh->mode) { + case CS_MODE_ETCA16: ptrWidthLog2 = 1; break; + case CS_MODE_ETCA32: ptrWidthLog2 = 2; break; + case CS_MODE_ETCA64: ptrWidthLog2 = 3; break; + default: ptrWidthLog2 = 1; break; + } + // clang-format on + size_t ptrWidthB = 1 << ptrWidthLog2; + DecodeIsntCtx ctx = { 0 }; ctx.insn = ETCA_INS_INVALID; ctx.cond = ETCA_COND_ALWAYS; @@ -539,11 +757,7 @@ bool Etca_getInstruction(csh ud, const uint8_t *code, size_t code_len, ctx.abm.present = false; ctx.ri.present = true; ctx.ri.r = ctx.abm.a; - ctx.ri.imm = 0; - for (size_t i = 0; i < sz; i++) { - ctx.ri.imm <<= 8; - ctx.ri.imm |= code[i]; - } + ctx.ri.imm = parseMultiByteUInt(code, sz); code += sz; code_len -= sz; @@ -590,8 +804,7 @@ bool Etca_getInstruction(csh ud, const uint8_t *code, size_t code_len, ETCA_REG_FIRST_BASE) + ctx.abm.b; - // TODO: mo1 & mo2 - if (ctx.abm.m != 0) + if (!parseM(info, ptrWidthB, &ctx, &code, &code_len, size)) return false; } else if (ctx.single_reg.present) { info->op.op_count = 1; diff --git a/arch/Etca/EtcaInstPrinter.c b/arch/Etca/EtcaInstPrinter.c index 7826255fea..dd19b0bb1a 100644 --- a/arch/Etca/EtcaInstPrinter.c +++ b/arch/Etca/EtcaInstPrinter.c @@ -327,7 +327,7 @@ static void printMemOp(SStream *O, cs_etca_op_mem *op) } first = false; - printInt32(O, op->displacement); + printInt64(O, op->displacement); } SStream_concat1(O, ']'); diff --git a/cstool/cstool.c b/cstool/cstool.c index 2cee740c8d..32e11647fd 100644 --- a/cstool/cstool.c +++ b/cstool/cstool.c @@ -339,7 +339,9 @@ static struct { { "evm", "ethereum virtual machine", CS_ARCH_EVM, 0 }, - { "etca", "ETC.a", CS_ARCH_ETCA, 0 }, + { "etca16", "ETC.a, 16 bit pointers", CS_ARCH_ETCA, CS_MODE_16 }, + { "etca32", "ETC.a, 32 bit pointers", CS_ARCH_ETCA, CS_MODE_32 }, + { "etca64", "ETC.a, 64 bit pointers", CS_ARCH_ETCA, CS_MODE_64 }, { "wasm", "web assembly", CS_ARCH_WASM, 0 }, diff --git a/include/capstone/capstone.h b/include/capstone/capstone.h index 770730cc78..b26511e3cd 100644 --- a/include/capstone/capstone.h +++ b/include/capstone/capstone.h @@ -227,7 +227,9 @@ typedef enum cs_mode { CS_MODE_SH4A = 1 << 5, ///< SH4A CS_MODE_SHFPU = 1 << 6, ///< w/ FPU CS_MODE_SHDSP = 1 << 7, ///< w/ DSP - CS_MODE_ETCA = 1 << 1, + CS_MODE_ETCA16 = CS_MODE_16, + CS_MODE_ETCA32 = CS_MODE_32, + CS_MODE_ETCA64 = CS_MODE_64, CS_MODE_TRICORE_110 = 1 << 1, ///< Tricore 1.1 CS_MODE_TRICORE_120 = 1 << 2, ///< Tricore 1.2 CS_MODE_TRICORE_130 = 1 << 3, ///< Tricore 1.3 diff --git a/include/capstone/etca.h b/include/capstone/etca.h index ae85c32e85..a0077fa4bb 100644 --- a/include/capstone/etca.h +++ b/include/capstone/etca.h @@ -67,7 +67,7 @@ typedef struct { cs_etca_reg base; } base; - int32_t displacement; + int64_t displacement; } cs_etca_op_mem; // Instruction operand From 0f2f49b864dbcc2be116d9cea01e3f3a2d31b49c Mon Sep 17 00:00:00 2001 From: Alexander Nutz Date: Sat, 1 Nov 2025 15:34:21 +0100 Subject: [PATCH 5/8] c --- arch/Etca/EtcaDisassembler.c | 26 +++++--------------------- arch/Etca/EtcaModule.c | 2 +- include/capstone/etca.h | 4 ---- 3 files changed, 6 insertions(+), 26 deletions(-) diff --git a/arch/Etca/EtcaDisassembler.c b/arch/Etca/EtcaDisassembler.c index c5ad25f51d..be195bdd53 100644 --- a/arch/Etca/EtcaDisassembler.c +++ b/arch/Etca/EtcaDisassembler.c @@ -9,24 +9,8 @@ #include #include "../../cs_priv.h" #include "../../MCInst.h" -#include "../../MCDisassembler.h" -#include "../../utils.h" +#include "../../MathExtras.h" #include "EtcaDisassembler.h" -#include "capstone/sh.h" - -/* -static void add_group(cs_detail *detail, cs_etca_insn_group group) -{ - if (detail != NULL && group > ETCA_GRP_INVALID && - group < ETCA_GRP_ENDING) - detail->groups[detail->groups_count++] = group; -} -*/ - -#define sign_extend(var, type, width) \ - if ((var) & (1 << ((width) - 1))) { \ - var = (((type) - 1) << (width)) | var; \ - } typedef struct { struct { @@ -174,7 +158,7 @@ static void parseRI(DecodeIsntCtx *ctx, uint8_t byte, etca_insn insn) ctx->ri.r = byte >> 5; ctx->ri.imm = byte & 31 /* 0b11111 */; if (doesSignExtend(insn)) - sign_extend(ctx->ri.imm, uint64_t, 5); + ctx->ri.imm = SignExtend64(ctx->ri.imm, 5); } static etca_insn parseExopOpcode(uint16_t opc) @@ -386,7 +370,7 @@ static bool parseCoreOp(DecodeIsntCtx *ctx, const uint8_t **code_p, ctx->insn = ETCA_INS_REL_CALL; uint64_t d = (code[0] & 0xF) << 8 | code[1]; - sign_extend(d, uint64_t, 12); + d = SignExtend64(d, 12); ctx->rel.present = true; ctx->rel.extended = d; @@ -402,7 +386,7 @@ static bool parseCoreOp(DecodeIsntCtx *ctx, const uint8_t **code_p, ctx->cond = code[0] & 0xF; uint64_t d = ((code[0] >> 4) & 1) << 8 | code[1]; - sign_extend(d, uint64_t, 9); + d = SignExtend64(d, 9); ctx->rel.present = true; ctx->rel.extended = d; @@ -433,7 +417,7 @@ static bool parseCoreOp(DecodeIsntCtx *ctx, const uint8_t **code_p, if (ctx->insn == ETCA_INS_REL_JMP || ctx->insn == ETCA_INS_REL_CALL) - sign_extend(d, uint64_t, sz * 8); + d = SignExtend64(d, sz * 8); code += sz + 1; code_len -= sz + 1; diff --git a/arch/Etca/EtcaModule.c b/arch/Etca/EtcaModule.c index 3f25468e1a..84c3b30305 100644 --- a/arch/Etca/EtcaModule.c +++ b/arch/Etca/EtcaModule.c @@ -12,7 +12,7 @@ cs_err Etca_global_init(cs_struct *ud) { etca_info *info; - info = cs_mem_malloc(sizeof(etca_info)); + info = cs_mem_calloc(1, sizeof(etca_info)); if (!info) { return CS_ERR_MEM; } diff --git a/include/capstone/etca.h b/include/capstone/etca.h index a0077fa4bb..95df3e8e8a 100644 --- a/include/capstone/etca.h +++ b/include/capstone/etca.h @@ -11,10 +11,6 @@ extern "C" { #include "platform.h" #include "cs_operand.h" -#ifdef _MSC_VER -#pragma warning(disable : 4201) -#endif - /// ETCA registers and special registers typedef enum { ETCA_REG_INVALID = 0, From 2b0a93e7408de184cf88afba57ff8b7fd900dcdc Mon Sep 17 00:00:00 2001 From: Alexander Nutz Date: Sat, 1 Nov 2025 15:41:53 +0100 Subject: [PATCH 6/8] clean up inst printer a bit --- arch/Etca/EtcaInstPrinter.c | 19 +++++++++++-------- include/capstone/etca.h | 5 +++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/Etca/EtcaInstPrinter.c b/arch/Etca/EtcaInstPrinter.c index dd19b0bb1a..9c12823b71 100644 --- a/arch/Etca/EtcaInstPrinter.c +++ b/arch/Etca/EtcaInstPrinter.c @@ -301,7 +301,7 @@ static void printReg(SStream *O, cs_etca_reg reg) #endif #ifndef CAPSTONE_DIET -static void printMemOp(SStream *O, cs_etca_op_mem *op) +static void printMemOp(SStream *O, const cs_etca_op_mem *op) { bool first = true; SStream_concat1(O, '['); @@ -335,7 +335,7 @@ static void printMemOp(SStream *O, cs_etca_op_mem *op) #endif #ifndef CAPSTONE_DIET -static void printOp(SStream *O, cs_etca_op *op, etca_insn insn) +static void printOp(SStream *O, const cs_etca_op *op, etca_insn insn) { switch (op->type) { case ETCA_OP_INVALID: @@ -439,6 +439,11 @@ static bool isSizedInsn(etca_insn insn) } #endif +static inline bool op_is_reg(const cs_etca_op *op, cs_etca_reg reg) +{ + return op->type == ETCA_OP_REG && op->reg == reg; +} + void Etca_printInst(MCInst *MI, SStream *O, void *infoIn) { #ifndef CAPSTONE_DIET @@ -473,14 +478,14 @@ void Etca_printInst(MCInst *MI, SStream *O, void *infoIn) int numPrinted = 0; for (int i = 0; i < info->op.op_count; i++) { + const cs_etca_op *op = &info->op.operands[i]; + /* don't print sp reg if it's the default */ if (i == 1 && info->op.insn == ETCA_INS_POP && - info->op.operands[1].type == ETCA_OP_REG && - info->op.operands[1].reg == ETCA_REG_R6) + op_is_reg(op, ETCA_REG_R6)) continue; if (i == 0 && info->op.insn == ETCA_INS_PUSH && - info->op.operands[0].type == ETCA_OP_REG && - info->op.operands[0].reg == ETCA_REG_R6) + op_is_reg(op, ETCA_REG_R6)) continue; if (numPrinted != 0) { @@ -488,8 +493,6 @@ void Etca_printInst(MCInst *MI, SStream *O, void *infoIn) } SStream_concat0(O, " "); - cs_etca_op *op = &info->op.operands[i]; - char const *crname; if (i == 1 && (info->op.insn == ETCA_INS_READCR || diff --git a/include/capstone/etca.h b/include/capstone/etca.h index 95df3e8e8a..bb2202fc5c 100644 --- a/include/capstone/etca.h +++ b/include/capstone/etca.h @@ -11,6 +11,11 @@ extern "C" { #include "platform.h" #include "cs_operand.h" +#ifdef _MSC_VER +// "anonymous unions are a non-standard extension" +#pragma warning(disable : 4201) +#endif + /// ETCA registers and special registers typedef enum { ETCA_REG_INVALID = 0, From 53db5dd4ab7bdd1a2f9a307233a2e8525bca6384 Mon Sep 17 00:00:00 2001 From: Alexander Nutz Date: Sat, 1 Nov 2025 15:42:50 +0100 Subject: [PATCH 7/8] remove etca from makefile --- Makefile | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/Makefile b/Makefile index 9e0c16cda0..59f6800edf 100644 --- a/Makefile +++ b/Makefile @@ -273,16 +273,6 @@ ifneq (,$(findstring evm,$(CAPSTONE_ARCHS))) LIBOBJ_EVM += $(LIBSRC_EVM:%.c=$(OBJDIR)/%.o) endif -DEP_ETCA = -DEP_ETCA += $(wildcard arch/Etca/Etca*.inc) - -LIBOBJ_ETCA = -ifneq (,$(findstring etca,$(CAPSTONE_ARCHS))) - CFLAGS += -DCAPSTONE_HAS_ETCA - LIBSRC_ETCA += $(wildcard arch/Etca/Etca*.c) - LIBOBJ_ETCA += $(LIBSRC_ETCA:%.c=$(OBJDIR)/%.o) -endif - DEP_RISCV = DEP_RISCV += $(wildcard arch/RISCV/RISCV*.inc) From 750c8c666d2c1e9070f646f30a9c0161d421276b Mon Sep 17 00:00:00 2001 From: Alexander Nutz Date: Sat, 1 Nov 2025 16:25:40 +0100 Subject: [PATCH 8/8] reformat with clang 17 --- arch/Etca/EtcaDisassembler.c | 2 +- arch/Etca/EtcaInstPrinter.c | 6 +- cs.c | 196 +++++++++++++++-------------------- 3 files changed, 90 insertions(+), 114 deletions(-) diff --git a/arch/Etca/EtcaDisassembler.c b/arch/Etca/EtcaDisassembler.c index be195bdd53..1df9127404 100644 --- a/arch/Etca/EtcaDisassembler.c +++ b/arch/Etca/EtcaDisassembler.c @@ -468,7 +468,7 @@ static sib_byte parseSib(uint8_t b) return (sib_byte){ b >> 6, (b >> 3) & 3, b & 3 }; } -static uint64_t parseMultiByteUInt(uint8_t const *code, size_t nb) +static uint64_t parseMultiByteUInt(const uint8_t *code, size_t nb) { uint64_t imm = 0; for (size_t i = 0; i < nb; i++) { diff --git a/arch/Etca/EtcaInstPrinter.c b/arch/Etca/EtcaInstPrinter.c index 9c12823b71..d062d704b4 100644 --- a/arch/Etca/EtcaInstPrinter.c +++ b/arch/Etca/EtcaInstPrinter.c @@ -207,7 +207,7 @@ const char *Etca_insn_name(csh handle, unsigned int id) #endif } -char const *cs_etca_cond_name(uint8_t cond) +const char *cs_etca_cond_name(uint8_t cond) { switch (cond) { case ETCA_COND_Z: @@ -249,7 +249,7 @@ char const *cs_etca_cond_name(uint8_t cond) } } -char const *cs_etca_cr_name(cs_etca_cr cr) +const char *cs_etca_cr_name(cs_etca_cr cr) { switch (cr) { case ETCA_CR_CPUID1: @@ -493,7 +493,7 @@ void Etca_printInst(MCInst *MI, SStream *O, void *infoIn) } SStream_concat0(O, " "); - char const *crname; + const char *crname; if (i == 1 && (info->op.insn == ETCA_INS_READCR || info->op.insn == ETCA_INS_WRITECR) && diff --git a/cs.c b/cs.c index 9bd285da35..b8791e5d2f 100644 --- a/cs.c +++ b/cs.c @@ -92,183 +92,159 @@ typedef struct cs_arch_config { #define CS_ARCH_CONFIG_ARM \ { \ - ARM_global_init, \ - ARM_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_ARM | CS_MODE_V8 | \ - CS_MODE_MCLASS | CS_MODE_THUMB | CS_MODE_BIG_ENDIAN), \ + ARM_global_init, ARM_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_ARM | CS_MODE_V8 | \ + CS_MODE_MCLASS | CS_MODE_THUMB | \ + CS_MODE_BIG_ENDIAN), \ } #define CS_ARCH_CONFIG_AARCH64 \ { \ - AArch64_global_init, \ - AArch64_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_ARM | CS_MODE_BIG_ENDIAN | \ - CS_MODE_APPLE_PROPRIETARY), \ + AArch64_global_init, AArch64_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_ARM | \ + CS_MODE_BIG_ENDIAN | CS_MODE_APPLE_PROPRIETARY), \ } #define CS_ARCH_CONFIG_MIPS \ { \ - Mips_global_init, \ - Mips_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BIG_ENDIAN | \ - CS_MODE_MIPS16 | CS_MODE_MIPS32 | CS_MODE_MIPS64 | \ - CS_MODE_MICRO | CS_MODE_MIPS1 | CS_MODE_MIPS2 | \ - CS_MODE_MIPS32R2 | CS_MODE_MIPS32R3 | CS_MODE_MIPS32R5 | \ - CS_MODE_MIPS32R6 | CS_MODE_MIPS3 | CS_MODE_MIPS4 | \ - CS_MODE_MIPS5 | CS_MODE_MIPS64R2 | CS_MODE_MIPS64R3 | \ - CS_MODE_MIPS64R5 | CS_MODE_MIPS64R6 | CS_MODE_OCTEON | \ - CS_MODE_OCTEONP | CS_MODE_NANOMIPS | CS_MODE_NMS1 | \ - CS_MODE_I7200 | CS_MODE_MIPS_NOFLOAT | CS_MODE_MIPS_PTR64), \ + Mips_global_init, Mips_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BIG_ENDIAN | \ + CS_MODE_MIPS16 | CS_MODE_MIPS32 | CS_MODE_MIPS64 | \ + CS_MODE_MICRO | CS_MODE_MIPS1 | CS_MODE_MIPS2 | \ + CS_MODE_MIPS32R2 | CS_MODE_MIPS32R3 | \ + CS_MODE_MIPS32R5 | CS_MODE_MIPS32R6 | \ + CS_MODE_MIPS3 | CS_MODE_MIPS4 | CS_MODE_MIPS5 | \ + CS_MODE_MIPS64R2 | CS_MODE_MIPS64R3 | \ + CS_MODE_MIPS64R5 | CS_MODE_MIPS64R6 | \ + CS_MODE_OCTEON | CS_MODE_OCTEONP | \ + CS_MODE_NANOMIPS | CS_MODE_NMS1 | CS_MODE_I7200 | \ + CS_MODE_MIPS_NOFLOAT | CS_MODE_MIPS_PTR64), \ } #define CS_ARCH_CONFIG_X86 \ { \ - X86_global_init, \ - X86_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_32 | CS_MODE_64 | \ - CS_MODE_16), \ + X86_global_init, X86_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_32 | CS_MODE_64 | \ + CS_MODE_16), \ } #define CS_ARCH_CONFIG_PPC \ { \ - PPC_global_init, \ - PPC_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_32 | CS_MODE_64 | \ - CS_MODE_BIG_ENDIAN | CS_MODE_QPX | CS_MODE_PS | \ - CS_MODE_BOOKE | CS_MODE_SPE | CS_MODE_AIX_OS | \ - CS_MODE_PWR7 | CS_MODE_PWR8 | CS_MODE_PWR9 | CS_MODE_PWR10 | \ - CS_MODE_PPC_ISA_FUTURE | CS_MODE_MSYNC | \ - CS_MODE_MODERN_AIX_AS), \ + PPC_global_init, PPC_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_32 | CS_MODE_64 | \ + CS_MODE_BIG_ENDIAN | CS_MODE_QPX | CS_MODE_PS | \ + CS_MODE_BOOKE | CS_MODE_SPE | CS_MODE_AIX_OS | \ + CS_MODE_PWR7 | CS_MODE_PWR8 | CS_MODE_PWR9 | \ + CS_MODE_PWR10 | CS_MODE_PPC_ISA_FUTURE | \ + CS_MODE_MSYNC | CS_MODE_MODERN_AIX_AS), \ } #define CS_ARCH_CONFIG_SPARC \ { \ - Sparc_global_init, \ - Sparc_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BIG_ENDIAN | CS_MODE_V9 | \ - CS_MODE_64 | CS_MODE_32), \ + Sparc_global_init, Sparc_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BIG_ENDIAN | \ + CS_MODE_V9 | CS_MODE_64 | CS_MODE_32), \ } #define CS_ARCH_CONFIG_SYSTEMZ \ { \ - SystemZ_global_init, \ - SystemZ_option, \ - ~(CS_MODE_BIG_ENDIAN | CS_MODE_SYSTEMZ_ARCH8 | \ - CS_MODE_SYSTEMZ_ARCH9 | CS_MODE_SYSTEMZ_ARCH10 | \ - CS_MODE_SYSTEMZ_ARCH11 | CS_MODE_SYSTEMZ_ARCH12 | \ - CS_MODE_SYSTEMZ_ARCH13 | CS_MODE_SYSTEMZ_ARCH14 | \ - CS_MODE_SYSTEMZ_Z10 | CS_MODE_SYSTEMZ_Z196 | \ - CS_MODE_SYSTEMZ_ZEC12 | CS_MODE_SYSTEMZ_Z13 | \ - CS_MODE_SYSTEMZ_Z14 | CS_MODE_SYSTEMZ_Z15 | \ - CS_MODE_SYSTEMZ_Z16 | CS_MODE_SYSTEMZ_GENERIC), \ + SystemZ_global_init, SystemZ_option, \ + ~(CS_MODE_BIG_ENDIAN | CS_MODE_SYSTEMZ_ARCH8 | \ + CS_MODE_SYSTEMZ_ARCH9 | CS_MODE_SYSTEMZ_ARCH10 | \ + CS_MODE_SYSTEMZ_ARCH11 | CS_MODE_SYSTEMZ_ARCH12 | \ + CS_MODE_SYSTEMZ_ARCH13 | CS_MODE_SYSTEMZ_ARCH14 | \ + CS_MODE_SYSTEMZ_Z10 | CS_MODE_SYSTEMZ_Z196 | \ + CS_MODE_SYSTEMZ_ZEC12 | CS_MODE_SYSTEMZ_Z13 | \ + CS_MODE_SYSTEMZ_Z14 | CS_MODE_SYSTEMZ_Z15 | \ + CS_MODE_SYSTEMZ_Z16 | CS_MODE_SYSTEMZ_GENERIC), \ } #define CS_ARCH_CONFIG_XCORE \ { \ - XCore_global_init, \ - XCore_option, \ - ~(CS_MODE_BIG_ENDIAN), \ + XCore_global_init, XCore_option, ~(CS_MODE_BIG_ENDIAN), \ } #define CS_ARCH_CONFIG_M68K \ { \ - M68K_global_init, \ - M68K_option, \ - ~(CS_MODE_BIG_ENDIAN | CS_MODE_M68K_000 | CS_MODE_M68K_010 | \ - CS_MODE_M68K_020 | CS_MODE_M68K_030 | CS_MODE_M68K_040 | \ - CS_MODE_M68K_060), \ + M68K_global_init, M68K_option, \ + ~(CS_MODE_BIG_ENDIAN | CS_MODE_M68K_000 | \ + CS_MODE_M68K_010 | CS_MODE_M68K_020 | \ + CS_MODE_M68K_030 | CS_MODE_M68K_040 | \ + CS_MODE_M68K_060), \ } #define CS_ARCH_CONFIG_TMS320C64X \ { \ - TMS320C64x_global_init, \ - TMS320C64x_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BIG_ENDIAN), \ + TMS320C64x_global_init, TMS320C64x_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BIG_ENDIAN), \ } #define CS_ARCH_CONFIG_M680X \ { \ - M680X_global_init, \ - M680X_option, \ - ~(CS_MODE_M680X_6301 | CS_MODE_M680X_6309 | \ - CS_MODE_M680X_6800 | CS_MODE_M680X_6801 | \ - CS_MODE_M680X_6805 | CS_MODE_M680X_6808 | \ - CS_MODE_M680X_6809 | CS_MODE_M680X_6811 | \ - CS_MODE_M680X_CPU12 | CS_MODE_M680X_HCS08), \ + M680X_global_init, M680X_option, \ + ~(CS_MODE_M680X_6301 | CS_MODE_M680X_6309 | \ + CS_MODE_M680X_6800 | CS_MODE_M680X_6801 | \ + CS_MODE_M680X_6805 | CS_MODE_M680X_6808 | \ + CS_MODE_M680X_6809 | CS_MODE_M680X_6811 | \ + CS_MODE_M680X_CPU12 | CS_MODE_M680X_HCS08), \ } #define CS_ARCH_CONFIG_EVM \ { \ - EVM_global_init, \ - EVM_option, \ - 0, \ + EVM_global_init, EVM_option, 0, \ } #define CS_ARCH_CONFIG_MOS65XX \ { \ - MOS65XX_global_init, \ - MOS65XX_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_MOS65XX_6502 | \ - CS_MODE_MOS65XX_65C02 | CS_MODE_MOS65XX_W65C02 | \ - CS_MODE_MOS65XX_65816_LONG_MX), \ + MOS65XX_global_init, MOS65XX_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_MOS65XX_6502 | \ + CS_MODE_MOS65XX_65C02 | CS_MODE_MOS65XX_W65C02 | \ + CS_MODE_MOS65XX_65816_LONG_MX), \ } #define CS_ARCH_CONFIG_WASM \ { \ - WASM_global_init, \ - WASM_option, \ - 0, \ + WASM_global_init, WASM_option, 0, \ } #define CS_ARCH_CONFIG_BPF \ { \ - BPF_global_init, \ - BPF_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BPF_CLASSIC | \ - CS_MODE_BPF_EXTENDED | CS_MODE_BIG_ENDIAN), \ + BPF_global_init, BPF_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BPF_CLASSIC | \ + CS_MODE_BPF_EXTENDED | CS_MODE_BIG_ENDIAN), \ } #define CS_ARCH_CONFIG_RISCV \ { \ - RISCV_global_init, \ - RISCV_option, \ - ~(CS_MODE_RISCV32 | CS_MODE_RISCV64 | CS_MODE_RISCVC), \ + RISCV_global_init, RISCV_option, \ + ~(CS_MODE_RISCV32 | CS_MODE_RISCV64 | CS_MODE_RISCVC), \ } #define CS_ARCH_CONFIG_SH \ { \ - SH_global_init, \ - SH_option, \ - ~(CS_MODE_SH2 | CS_MODE_SH2A | CS_MODE_SH3 | CS_MODE_SH4 | \ - CS_MODE_SH4A | CS_MODE_SHFPU | CS_MODE_SHDSP | \ - CS_MODE_BIG_ENDIAN), \ + SH_global_init, SH_option, \ + ~(CS_MODE_SH2 | CS_MODE_SH2A | CS_MODE_SH3 | \ + CS_MODE_SH4 | CS_MODE_SH4A | CS_MODE_SHFPU | \ + CS_MODE_SHDSP | CS_MODE_BIG_ENDIAN), \ } #define CS_ARCH_CONFIG_TRICORE \ { \ - TRICORE_global_init, \ - TRICORE_option, \ - ~(CS_MODE_TRICORE_110 | CS_MODE_TRICORE_120 | \ - CS_MODE_TRICORE_130 | CS_MODE_TRICORE_131 | \ - CS_MODE_TRICORE_160 | CS_MODE_TRICORE_161 | \ - CS_MODE_TRICORE_162 | CS_MODE_TRICORE_180 | \ - CS_MODE_LITTLE_ENDIAN), \ + TRICORE_global_init, TRICORE_option, \ + ~(CS_MODE_TRICORE_110 | CS_MODE_TRICORE_120 | \ + CS_MODE_TRICORE_130 | CS_MODE_TRICORE_131 | \ + CS_MODE_TRICORE_160 | CS_MODE_TRICORE_161 | \ + CS_MODE_TRICORE_162 | CS_MODE_TRICORE_180 | \ + CS_MODE_LITTLE_ENDIAN), \ } #define CS_ARCH_CONFIG_ETCA \ { \ - Etca_global_init, \ - Etca_option, \ - ~(0), \ + Etca_global_init, Etca_option, ~(0), \ } #define CS_ARCH_CONFIG_ALPHA \ { \ - ALPHA_global_init, \ - ALPHA_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BIG_ENDIAN), \ + ALPHA_global_init, ALPHA_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_BIG_ENDIAN), \ } #define CS_ARCH_CONFIG_LOONGARCH \ { \ - LoongArch_global_init, \ - LoongArch_option, \ - ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_LOONGARCH32 | \ - CS_MODE_LOONGARCH64), \ + LoongArch_global_init, LoongArch_option, \ + ~(CS_MODE_LITTLE_ENDIAN | CS_MODE_LOONGARCH32 | \ + CS_MODE_LOONGARCH64), \ } #define CS_ARCH_CONFIG_XTENSA \ { \ - Xtensa_global_init, \ - Xtensa_option, \ - ~(CS_MODE_XTENSA_ESP32 | CS_MODE_XTENSA_ESP32S2 | \ - CS_MODE_XTENSA_ESP8266), \ + Xtensa_global_init, Xtensa_option, \ + ~(CS_MODE_XTENSA_ESP32 | CS_MODE_XTENSA_ESP32S2 | \ + CS_MODE_XTENSA_ESP8266), \ } #define CS_ARCH_CONFIG_ARC \ { \ - ARC_global_init, \ - ARC_option, \ - ~(CS_MODE_LITTLE_ENDIAN), \ + ARC_global_init, ARC_option, ~(CS_MODE_LITTLE_ENDIAN), \ } #ifdef CAPSTONE_USE_ARCH_REGISTRATION