diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index 477ab5daf4..c00ce4bba4 100644 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -225,10 +225,6 @@ int Table64(dynarec_arm_t *dyn, uint64_t val, int pass); // add a value to tabl void CreateJmpNext(void* addr, void* next); -// TODO: Save and restore the temp register. -#define SAVE_ACTIVE_SCRATCH_REGISTERS do{} while(0); -#define LOAD_ACTIVE_SCRATCH_REGISTERS do{} while(0); - #define GO_TRACE(A, B, s0) \ GETIP(addr); \ MOVx_REG(x1, xRIP); \ diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c index 75798ae6c8..e1852d62e7 100644 --- a/src/dynarec/dynarec_native_pass.c +++ b/src/dynarec/dynarec_native_pass.c @@ -257,11 +257,15 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int if((trace_end == 0) || ((ip >= trace_start) && (ip < trace_end))) { MESSAGE(LOG_DUMP, "TRACE ----\n"); - if (BOX64ENV(dynarec_nativeflags)) SAVE_ACTIVE_SCRATCH_REGISTERS; + #if defined (SPILL_NF_REGISTERS) + if (BOX64ENV(dynarec_nativeflags)) SPILL_NF_REGISTERS; + #endif fpu_reflectcache(dyn, ninst, x1, x2, x3); GO_TRACE(PrintTrace, 1, x5); fpu_unreflectcache(dyn, ninst, x1, x2, x3); - if (BOX64ENV(dynarec_nativeflags)) LOAD_ACTIVE_SCRATCH_REGISTERS; + #if defined (RESTORE_NF_REGISTERS) + if (BOX64ENV(dynarec_nativeflags)) RESTORE_NF_REGISTERS; + #endif MESSAGE(LOG_DUMP, "----------\n"); } } diff --git a/src/dynarec/la64/dynarec_la64_private.h b/src/dynarec/la64/dynarec_la64_private.h index e70be9c261..003f62ee2c 100644 --- a/src/dynarec/la64/dynarec_la64_private.h +++ b/src/dynarec/la64/dynarec_la64_private.h @@ -200,9 +200,33 @@ int Table64(dynarec_la64_t *dyn, uint64_t val, int pass); // add a value to tab void CreateJmpNext(void* addr, void* next); -// TODO: Save and restore the temp register. -#define SAVE_ACTIVE_SCRATCH_REGISTERS do{} while(0); -#define LOAD_ACTIVE_SCRATCH_REGISTERS do{} while(0); +// While we could theoretically traverse forward to find the flags-consuming x86 +// instruction and get the exact scratch registers to save, this is too complicated. +// So we went with the simpler approach of saving all scratch registers, this won't +// add noticeable performance overhead in trace mode. +#define SPILL_NF_REGISTERS \ + do { \ + ADDI_D(xSP, xSP, -64); \ + ST_D(x1, xSP, 0 * 8); \ + ST_D(x2, xSP, 1 * 8); \ + ST_D(x3, xSP, 2 * 8); \ + ST_D(x4, xSP, 3 * 8); \ + ST_D(x5, xSP, 4 * 8); \ + ST_D(x6, xSP, 5 * 8); \ + ST_D(x7, xSP, 6 * 8); \ + } while(0); + +#define RESTORE_NF_REGISTERS \ + do { \ + LD_D(x1, xSP, 0 * 8); \ + LD_D(x2, xSP, 1 * 8); \ + LD_D(x3, xSP, 2 * 8); \ + LD_D(x4, xSP, 3 * 8); \ + LD_D(x5, xSP, 4 * 8); \ + LD_D(x6, xSP, 5 * 8); \ + LD_D(x7, xSP, 6 * 8); \ + ADDI_D(xSP, xSP, 64); \ + } while(0); #define GO_TRACE(A, B, s0) \ GETIP(addr, s0); \ diff --git a/src/dynarec/ppc64le/dynarec_ppc64le_private.h b/src/dynarec/ppc64le/dynarec_ppc64le_private.h index e3a1ab85a2..4dfe101d7c 100644 --- a/src/dynarec/ppc64le/dynarec_ppc64le_private.h +++ b/src/dynarec/ppc64le/dynarec_ppc64le_private.h @@ -218,9 +218,33 @@ int Table64(dynarec_ppc64le_t *dyn, uint64_t val, int pass); // add a value to void CreateJmpNext(void* addr, void* next); -// TODO: Save and restore the temp register. -#define SAVE_ACTIVE_SCRATCH_REGISTERS do{} while(0); -#define LOAD_ACTIVE_SCRATCH_REGISTERS do{} while(0); +// While we could theoretically traverse forward to find the flags-consuming x86 +// instruction and get the exact scratch registers to save, this is too complicated. +// So we went with the simpler approach of saving all scratch registers, this won't +// add noticeable performance overhead in trace mode. +#define SPILL_NF_REGISTERS \ + do { \ + ADDI(xSP, xSP, -64); \ + STD(x1, 0 * 8, xSP); \ + STD(x2, 1 * 8, xSP); \ + STD(x3, 2 * 8, xSP); \ + STD(x4, 3 * 8, xSP); \ + STD(x5, 4 * 8, xSP); \ + STD(x6, 5 * 8, xSP); \ + STD(x7, 6 * 8, xSP); \ + } while(0); + +#define RESTORE_NF_REGISTERS \ + do { \ + LD(x1, 0 * 8, xSP); \ + LD(x2, 1 * 8, xSP); \ + LD(x3, 2 * 8, xSP); \ + LD(x4, 3 * 8, xSP); \ + LD(x5, 4 * 8, xSP); \ + LD(x6, 5 * 8, xSP); \ + LD(x7, 6 * 8, xSP); \ + ADDI(xSP, xSP, 64); \ + } while(0); #define GO_TRACE(A, B, s0) \ GETIP(addr, s0); \ diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index 416832a2f7..160f6cec9f 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -216,30 +216,30 @@ int Table64(dynarec_rv64_t *dyn, uint64_t val, int pass); // add a value to tab void CreateJmpNext(void* addr, void* next); -#define SAVE_ACTIVE_SCRATCH_REGISTERS \ - do { \ - uint8_t n1 = dyn->insts[ninst].nat_flags_op1; \ - uint8_t n2 = dyn->insts[ninst].nat_flags_op2; \ - if (IS_SCRATCH(n1) || IS_SCRATCH(n2)) { \ - SUBI(xSP, xSP, 16); \ - if (IS_SCRATCH(n1)) \ - SD(n1, xSP, 0); \ - if (n1 != n2 && IS_SCRATCH(n2)) \ - SD(n2, xSP, 8); \ - } \ +// While we could theoretically traverse forward to find the flags-consuming x86 +// instruction and get the exact scratch registers to save, this is too complicated. +// So we went with the simpler approach of saving all scratch registers, this won't +// add noticeable performance overhead in trace mode. +#define SPILL_NF_REGISTERS \ + do { \ + SUBI(xSP, xSP, 6 * 8); \ + SD(x1, xSP, 0 * 8); \ + SD(x2, xSP, 1 * 8); \ + SD(x3, xSP, 2 * 8); \ + SD(x4, xSP, 3 * 8); \ + SD(x5, xSP, 4 * 8); \ + SD(x6, xSP, 5 * 8); \ } while(0); -#define LOAD_ACTIVE_SCRATCH_REGISTERS \ - do { \ - uint8_t n1 = dyn->insts[ninst].nat_flags_op1; \ - uint8_t n2 = dyn->insts[ninst].nat_flags_op2; \ - if (IS_SCRATCH(n1) || IS_SCRATCH(n2)) { \ - if (IS_SCRATCH(n1)) \ - LD(n1, xSP, 0); \ - if (n1 != n2 && IS_SCRATCH(n2)) \ - LD(n2, xSP, 8); \ - ADDI(xSP, xSP, 16); \ - } \ +#define RESTORE_NF_REGISTERS \ + do { \ + LD(x1, xSP, 0 * 8); \ + LD(x2, xSP, 1 * 8); \ + LD(x3, xSP, 2 * 8); \ + LD(x4, xSP, 3 * 8); \ + LD(x5, xSP, 4 * 8); \ + LD(x6, xSP, 5 * 8); \ + ADDI(xSP, xSP, 6 * 8); \ } while(0); #define GO_TRACE(A, B, s0) \