From 7dfa35dbd73a8694c7b6da905cd1acaec4df7210 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 18 Dec 2025 10:24:37 +0000 Subject: [PATCH 01/65] Arm64: [PAC-RET] Add Pointer Authentication support for Arm64 This PR adds support for Pointer Authentication (PAC) on Arm64. Pointer Authentication (PAC) is an Armv8.3+ security feature designed to mitigate Return-Oriented Programming (ROP) attacks by cryptographically signing return addresses. While using PAC, we store a signed return address, instead of the plain address, on the stack and later authenticate it before returning from a function. It ensures control flow returns to the intended caller. More details on PAC and its role in software security can be found ([here](https://llsoftsec.github.io/llsoftsecbook/#sec:pointer-authentication)). - The current implementation of PAC is turned off by default, but can be turned on by setting DOTNET_JitPacEnabled=1. - PAC protects link register (LR) by signing it in the prolog (using `paciasp`) before it is split, using the current SP as the modifier. It then authenticates the LR in the epilog (using `autiasp`) before the function returns. If the signature is invalid, the execution fails with `SIGILL`. - - When the runtime needs to read or overwrite a return address during hijacking for GC, it now strips the PAC (using `xpaclri`) and re-signs the new target address before storing it back. - To simply tracking the SP in return address hijacking, we avoid using the pre-indexed variant of storing FP/LR on stack (e.g., `stp fp,lr,[sp,-#framesz]! `) to simply tracking the SP in return address hijacking. We obtain the value of SP at the time of signing the LR from the location of the current FP. We can't use this approach when the pre-indexed `stp` is used because we don't know the`#framesz`. - The updated prolog/epilog sequences generated by the JIT now look like: // Prolog sub sp, sp, #framesz paciasp ; sign LR with A-key + SP stp fp, lr, [sp] // Epilog ldp fp, lr, [sp] autiasp ; authenticate LR add sp, sp, #framesz ret ToDos: [] Restore the original frame layout that used pre-indexed variant of `stp` to store FP/LR. [] Authenticate the return address instead of stripping in return address hijacking and unwinding. [] Identify increased binary size for System.*.dll [] Determine performance regressions using benchmarks such as OrchirdCMS. --- src/coreclr/debug/ee/controller.cpp | 10 ++ src/coreclr/inc/cfi.h | 4 +- src/coreclr/inc/clrconfigvalues.h | 1 + src/coreclr/inc/gcinfodecoder.h | 7 +- src/coreclr/jit/codegenarm64.cpp | 143 ++++++++++++++---- src/coreclr/jit/codegenarmarch.cpp | 19 ++- src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/emit.h | 5 + src/coreclr/jit/emitarm64.cpp | 26 ++++ src/coreclr/jit/jitconfigvalues.h | 1 + src/coreclr/jit/unwind.cpp | 5 + src/coreclr/jit/unwindarm64.cpp | 33 ++++ src/coreclr/nativeaot/Runtime/ICodeManager.h | 5 + .../nativeaot/Runtime/StackFrameIterator.cpp | 14 ++ src/coreclr/nativeaot/Runtime/arm64/GcProbe.S | 1 + .../nativeaot/Runtime/arm64/GcProbe.asm | 1 + .../nativeaot/Runtime/arm64/MiscStubs.S | 26 ++++ .../nativeaot/Runtime/arm64/MiscStubs.asm | 24 +++ src/coreclr/nativeaot/Runtime/thread.cpp | 21 ++- .../Runtime/unix/UnixNativeCodeManager.cpp | 78 ++++++++++ .../Runtime/unix/UnixNativeCodeManager.h | 4 + .../Runtime/windows/CoffNativeCodeManager.cpp | 49 ++++++ .../Runtime/windows/CoffNativeCodeManager.h | 2 + .../ObjectWriter/Dwarf/DwarfCfiOpcode.cs | 3 +- .../Compiler/ObjectWriter/Dwarf/DwarfFde.cs | 4 + .../ObjectWriter/Eabi/EabiUnwindConverter.cs | 4 + .../JitInterface/CorInfoImpl.RyuJit.cs | 16 +- src/coreclr/unwinder/arm64/unwinder.cpp | 74 ++++++++- src/coreclr/vm/arm64/asmconstants.h | 6 +- src/coreclr/vm/arm64/asmhelpers.S | 27 ++++ src/coreclr/vm/arm64/asmhelpers.asm | 26 ++++ src/coreclr/vm/arm64/cgencpu.h | 3 +- src/coreclr/vm/excep.cpp | 76 ++++++++++ src/coreclr/vm/excep.h | 4 +- src/coreclr/vm/tailcallhelp.cpp | 12 +- src/coreclr/vm/threads.h | 3 +- src/coreclr/vm/threadsuspend.cpp | 28 +++- .../llvm-libunwind/src/DwarfInstructions.hpp | 13 +- 38 files changed, 726 insertions(+), 53 deletions(-) diff --git a/src/coreclr/debug/ee/controller.cpp b/src/coreclr/debug/ee/controller.cpp index 94a2ee700a6365..8763c6cb661443 100644 --- a/src/coreclr/debug/ee/controller.cpp +++ b/src/coreclr/debug/ee/controller.cpp @@ -21,6 +21,10 @@ #include "../../vm/methoditer.h" #include "../../vm/tailcallhelp.h" +#if defined(TARGET_ARM64) +extern "C" void* PacStripPtr(void* ptr); +#endif // TARGET_ARM64 + const char *GetTType( TraceType tt); #define IsSingleStep(exception) ((exception) == EXCEPTION_SINGLE_STEP) @@ -5837,6 +5841,12 @@ static bool IsTailCall(const BYTE * ip, ControllerStackInfo* info, TailCallFunct TailCallTls* tls = GetThread()->GetTailCallTls(); LPVOID tailCallAwareRetAddr = tls->GetFrame()->TailCallAwareReturnAddress; +#if defined(TARGET_ARM64) + //TODO-PAC: Authenticate instead of stripping the return addresses. + retAddr = PacStripPtr(retAddr); + tailCallAwareRetAddr = PacStripPtr(tailCallAwareRetAddr); +#endif // TARGET_ARM64 + LOG((LF_CORDB,LL_INFO1000, "ITCTR: ret addr is %p, tailcall aware ret addr is %p\n", retAddr, tailCallAwareRetAddr)); diff --git a/src/coreclr/inc/cfi.h b/src/coreclr/inc/cfi.h index 3d7ec0f4cc11f8..95a7a0077c8e25 100644 --- a/src/coreclr/inc/cfi.h +++ b/src/coreclr/inc/cfi.h @@ -9,7 +9,9 @@ enum CFI_OPCODE { CFI_ADJUST_CFA_OFFSET, // Offset is adjusted relative to the current one. CFI_DEF_CFA_REGISTER, // New register is used to compute CFA - CFI_REL_OFFSET // Register is saved at offset from the current CFA + CFI_REL_OFFSET, // Register is saved at offset from the current CFA + CFI_DEF_CFA, // Take address from register and add offset to it + CFI_NEGATE_RA_STATE, // Sign the return address in lr with paciaz }; struct CFI_CODE diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 89752806ed552d..1efc6a1bcbbeae 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -708,6 +708,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Rcpc, W("EnableArm64Rc RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Rcpc2, W("EnableArm64Rcpc2"), 1, "Allows Arm64 Rcpc2+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Sve, W("EnableArm64Sve"), 1, "Allows Arm64 SVE hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Sve2, W("EnableArm64Sve2"), 1, "Allows Arm64 SVE2 hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitPacEnabled, W("JitPacEnabled"), 1, "Allows Arm64 Pointer Authentication (PAC) to be disabled") #elif defined(TARGET_RISCV64) RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableRiscV64Zba, W("EnableRiscV64Zba"), 1, "Allows RiscV64 Zba hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableRiscV64Zbb, W("EnableRiscV64Zbb"), 1, "Allows RiscV64 Zbb hardware intrinsics to be disabled") diff --git a/src/coreclr/inc/gcinfodecoder.h b/src/coreclr/inc/gcinfodecoder.h index 0836dfd0c54b68..ba9de788c29f52 100644 --- a/src/coreclr/inc/gcinfodecoder.h +++ b/src/coreclr/inc/gcinfodecoder.h @@ -76,6 +76,10 @@ typedef void * OBJECTREF; #ifndef __cgencpu_h__ +#if defined(TARGET_ARM64) +extern "C" void* PacStripPtr(void* ptr); +#endif // TARGET_ARM64 + inline void SetIP(T_CONTEXT* context, PCODE rip) { _ASSERTE(!"don't call this"); @@ -105,7 +109,8 @@ inline PCODE GetIP(T_CONTEXT* context) #elif defined(TARGET_ARM) return (PCODE)context->Pc; #elif defined(TARGET_ARM64) - return (PCODE)context->Pc; + //TODO-PAC: Authenticate instead of stripping the return address. + return (PCODE) PacStripPtr((void *)context->Pc); #elif defined(TARGET_LOONGARCH64) return (PCODE)context->Pc; #elif defined(TARGET_RISCV64) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 9e760801ca2412..1ee2f19d24cc04 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -209,12 +209,25 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) { case 1: { - // Generate: - // ldp fp,lr,[sp],#framesz - - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize, - INS_OPTS_POST_INDEX); - compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); + if (JitConfig.JitPacEnabled() != 0) + { + // Generate: + // ldp fp,lr,[sp] + // autiasp + // add sp, sp, #framesz + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); + compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); + GetEmitter()->emitPacInEpilog(); + genStackPointerAdjustment(totalFrameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ true); + } + else + { + // Generate: + // ldp fp,lr,[sp],#framesz + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize, + INS_OPTS_POST_INDEX); + compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); + } break; } @@ -222,12 +235,13 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) { // Generate: // ldp fp,lr,[sp,#outsz] + // autiasp ; if PAC is enabled // add sp,sp,#framesz GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize); compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize); - + GetEmitter()->emitPacInEpilog(); GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); compiler->unwindAllocStack(totalFrameSize); break; @@ -481,11 +495,28 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, assert(!useSaveNextPair); if ((spOffset == 0) && (spDelta >= -512)) { - // We can use pre-indexed addressing. - // stp REG, REG + 1, [SP, #spDelta]! - // 64-bit STP offset range: -512 to 504, multiple of 8. - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX); - compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta); + // We can use pre-indexed addressing when pointer authentication PAC is disabled. + assert(reg1 != REG_LR); + if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR)) + { + // Generate: + // sub SP, SP, #spDelta + // paciasp + // stp REG, REG + 1, [SP] + assert(reg1 == REG_FP); + genStackPointerAdjustment(spDelta, REG_NA, nullptr, /* reportUnwindData */ true); + GetEmitter()->emitPacInProlog(); + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); + compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); + } + else + { + // Generate: + // stp REG, REG + 1, [SP, #spDelta]! + // 64-bit STP offset range: -512 to 504, multiple of 8. + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX); + compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta); + } needToSaveRegs = false; } @@ -505,6 +536,13 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, // 64-bit STP offset range: -512 to 504, multiple of 8. assert(spOffset <= 504); assert((spOffset % 8) == 0); + + assert(reg1 != REG_LR); + if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR)) + { + assert(reg1 == REG_FP); + GetEmitter()->emitPacInProlog(); + } GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); if (TargetOS::IsUnix && compiler->generateCFIUnwindCodes()) @@ -613,16 +651,30 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both // FP/SIMD + assert(reg1 != REG_LR); if (spDelta != 0) { assert(!useSaveNextPair); if ((spOffset == 0) && (spDelta <= 504)) { - // Fold the SP change into this instruction. - // ldp reg1, reg2, [SP], #spDelta - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX); - compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta); + if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR)) + { + // We separate SP increment and loading FP/LR when PAC is enabled. + assert(reg1 == REG_FP); + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, 0); + compiler->unwindSaveRegPair(reg1, reg2, 0); + GetEmitter()->emitPacInEpilog(); + genStackPointerAdjustment(spDelta, REG_NA, nullptr, /* reportUnwindData */ true); + } + else + { + // Fold the SP change into this instruction. + // ldp reg1, reg2, [SP], #spDelta + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, + INS_OPTS_POST_INDEX); + compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta); + } } else // (spOffset != 0) || (spDelta > 504) { @@ -632,6 +684,12 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); compiler->unwindSaveRegPair(reg1, reg2, spOffset); + if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR)) + { + assert(reg1 == REG_FP); + GetEmitter()->emitPacInEpilog(); + } + // generate add SP,SP,imm genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); } @@ -640,6 +698,12 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, { GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); + if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR)) + { + assert(reg1 == REG_FP); + GetEmitter()->emitPacInEpilog(); + } + if (TargetOS::IsUnix && compiler->generateCFIUnwindCodes()) { useSaveNextPair = false; @@ -1375,14 +1439,26 @@ void CodeGen::genFuncletProlog(BasicBlock* block) bool scratchRegIsZero = false; genAllocLclFrame(-genFuncletInfo.fiSpDelta1, REG_SCRATCH, &scratchRegIsZero, maskArgRegsLiveIn); genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_SCRATCH, nullptr, /* reportUnwindData */ true); + GetEmitter()->emitPacInProlog(); GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); } else { - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1, - INS_OPTS_PRE_INDEX); - compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1); + if (JitConfig.JitPacEnabled() != 0) + { + // generate sub SP,SP,imm + genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_NA, nullptr, /* reportUnwindData */ true); + GetEmitter()->emitPacInProlog(); + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); + compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); + } + else + { + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, + genFuncletInfo.fiSpDelta1, INS_OPTS_PRE_INDEX); + compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1); + } } maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now @@ -1401,6 +1477,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) assert(genFuncletInfo.fiSpDelta2 == 0); + GetEmitter()->emitPacInProlog(); GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSP_to_FPLR_save_delta); compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta); @@ -1409,13 +1486,15 @@ void CodeGen::genFuncletProlog(BasicBlock* block) } else if (genFuncletInfo.fiFrameType == 3) { - if (compiler->opts.IsOSR()) + // Avoid using pre-indexed store when PAC is enabled. + if ((compiler->opts.IsOSR()) || (JitConfig.JitPacEnabled() != 0)) { // With OSR we may see large values for fiSpDelta1 // We repurpose genAllocLclFram to do the necessary probing. bool scratchRegIsZero = false; genAllocLclFrame(-genFuncletInfo.fiSpDelta1, REG_SCRATCH, &scratchRegIsZero, maskArgRegsLiveIn); genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_SCRATCH, nullptr, /* reportUnwindData */ true); + GetEmitter()->emitPacInProlog(); GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); } @@ -1545,12 +1624,21 @@ void CodeGen::genFuncletEpilog() { // With OSR we may see large values for fiSpDelta1 // - if (compiler->opts.IsOSR()) + if (compiler->opts.IsOSR() || (JitConfig.JitPacEnabled() != 0)) { - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); - compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); + ssize_t imm = 0; + regNumber tempReg = REG_SCRATCH; - genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, REG_SCRATCH, nullptr, /* reportUnwindData */ true); + if (JitConfig.JitPacEnabled() != 0) + { + imm = genFuncletInfo.fiSP_to_FPLR_save_delta; + tempReg = REG_NA; + } + + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, imm); + compiler->unwindSaveRegPair(REG_FP, REG_LR, imm); + GetEmitter()->emitPacInEpilog(); + genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, tempReg, nullptr, /* reportUnwindData */ true); } else { @@ -1567,6 +1655,7 @@ void CodeGen::genFuncletEpilog() GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSP_to_FPLR_save_delta); compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta); + GetEmitter()->emitPacInEpilog(); // fiFrameType==2 constraints: assert(genFuncletInfo.fiSpDelta1 < 0); @@ -1580,12 +1669,12 @@ void CodeGen::genFuncletEpilog() else if (genFuncletInfo.fiFrameType == 3) { // With OSR we may see large values for fiSpDelta1 - // - if (compiler->opts.IsOSR()) + // Avoid post-indexed load when PAC is enabled. + if (compiler->opts.IsOSR() || (JitConfig.JitPacEnabled() != 0)) { GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); - + GetEmitter()->emitPacInEpilog(); genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, REG_SCRATCH, nullptr, /* reportUnwindData */ true); } else diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 1849c5496126f9..08d0a254a75125 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4543,9 +4543,20 @@ void CodeGen::genPushCalleeSavedRegisters() assert(totalFrameSize <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES); - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize, - INS_OPTS_PRE_INDEX); - compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); + if (JitConfig.JitPacEnabled() != 0) + { + // Avoid pre-indexed variant of store to save FP/LR when PAC is enabled. + genStackPointerAdjustment(-totalFrameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ true); + GetEmitter()->emitPacInProlog(); + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); + compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); + } + else + { + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize, + INS_OPTS_PRE_INDEX); + compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); + } maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR @@ -4596,6 +4607,7 @@ void CodeGen::genPushCalleeSavedRegisters() assert(compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize); + GetEmitter()->emitPacInProlog(); GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize); compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize); @@ -4710,6 +4722,7 @@ void CodeGen::genPushCalleeSavedRegisters() } else { + GetEmitter()->emitPacInProlog(); // No frame pointer (no chaining). assert((maskSaveRegsInt & RBM_FP) == 0); assert((maskSaveRegsInt & RBM_LR) != 0); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 5130ca5eb122d6..013cca31189118 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8832,6 +8832,7 @@ class Compiler void unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset); // stp reg1, reg2, [sp, #offset] void unwindSaveRegPairPreindexed(regNumber reg1, regNumber reg2, int offset); // stp reg1, reg2, [sp, #offset]! void unwindSaveNext(); // unwind code: save_next + void unwindPacSignLR(); // unwind code: pac_sign_lr void unwindReturn(regNumber reg); // ret lr #endif // defined(TARGET_ARM64) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 2c843f03483d74..0c022e634f40ca 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -3291,6 +3291,11 @@ class emitter instrDescAlign* emitNewInstrAlign(); #endif +#if defined(TARGET_ARM64) + void emitPacInProlog(); + void emitPacInEpilog(); +#endif + instrDesc* emitNewInstrSmall(emitAttr attr); instrDesc* emitNewInstr(emitAttr attr = EA_4BYTE); instrDesc* emitNewInstrSC(emitAttr attr, cnsval_ssize_t cns); diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index e88e10568712d3..a733349ca5be97 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1397,6 +1397,32 @@ static const char * const bRegNames[] = // clang-format on +//------------------------------------------------------------------------ +// emitPacInProlog: Sign LR as part of Pointer Authentication (PAC) support +// +void emitter::emitPacInProlog() +{ + if (JitConfig.JitPacEnabled() == 0) + { + return; + } + emitIns(INS_paciasp); + emitComp->unwindPacSignLR(); +} + +//------------------------------------------------------------------------ +// emitPacInEpilog: unsign LR as part of Pointer Authentication (PAC) support +// +void emitter::emitPacInEpilog() +{ + if (JitConfig.JitPacEnabled() == 0) + { + return; + } + emitIns(INS_autiasp); + emitComp->unwindPacSignLR(); +} + //------------------------------------------------------------------------ // emitRegName: Returns a general-purpose register name or SIMD and floating-point scalar register name. // diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index eb675224874412..0f4223ea0be9ed 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -125,6 +125,7 @@ CONFIG_STRING(JitInlineMethodsWithEHRange, "JitInlineMethodsWithEHRange") CONFIG_INTEGER(JitLongAddress, "JitLongAddress", 0) // Force using the large pseudo instruction form for long address CONFIG_INTEGER(JitMaxUncheckedOffset, "JitMaxUncheckedOffset", 8) +RELEASE_CONFIG_INTEGER(JitPacEnabled, "JitPacEnabled", 1) // // MinOpts diff --git a/src/coreclr/jit/unwind.cpp b/src/coreclr/jit/unwind.cpp index fac8165008b0bf..5b9bdeabbe04af 100644 --- a/src/coreclr/jit/unwind.cpp +++ b/src/coreclr/jit/unwind.cpp @@ -413,6 +413,11 @@ void Compiler::DumpCfiInfo(bool isHotCode, assert(dwarfReg == DWARF_REG_ILLEGAL); printf(" CodeOffset: 0x%02X Op: AdjustCfaOffset Offset:0x%X\n", codeOffset, offset); break; + case CFI_NEGATE_RA_STATE: + assert(dwarfReg == DWARF_REG_ILLEGAL); + assert(offset == 0); + printf(" CodeOffset: 0x%02X Op: NegateRAState\n", codeOffset); + break; default: printf(" Unrecognized CFI_CODE: 0x%llX\n", *(UINT64*)pCode); break; diff --git a/src/coreclr/jit/unwindarm64.cpp b/src/coreclr/jit/unwindarm64.cpp index f842737171c0b4..c950f50a11548b 100644 --- a/src/coreclr/jit/unwindarm64.cpp +++ b/src/coreclr/jit/unwindarm64.cpp @@ -635,6 +635,33 @@ void Compiler::unwindSaveNext() pu->AddCode(0xE6); } +void Compiler::unwindPacSignLR() +{ + if (JitConfig.JitPacEnabled() == 0) + { + return; + } +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + FuncInfoDsc* func = funCurrentFunc(); + UNATIVE_OFFSET cbProlog = 0; + if (compGeneratingProlog) + { + cbProlog = unwindGetCurrentOffset(func); + } + + // Maps to DW_CFA_AARCH64_negate_ra_state + createCfiCode(func, cbProlog, CFI_NEGATE_RA_STATE, DWARF_REG_ILLEGAL); + + return; + } +#endif // FEATURE_CFI_SUPPORT + + // pac_sign_lr: 11111100: sign the return address in lr with paciaz + funCurrentFunc()->uwi.AddCode(0xFC); +} + void Compiler::unwindReturn(regNumber reg) { // Nothing to do; we will always have at least one trailing "end" opcode in our padding. @@ -1081,6 +1108,12 @@ void DumpUnwindInfo(Compiler* comp, printf(" %02X save_next\n", b1); } + else if (b1 == 0xFC) + { + // pac_sign_lr: 11111100 : sign the return address in lr with paciaz. + + printf(" %02X pac_sign_lr\n", b1); + } else { // Unknown / reserved unwind code diff --git a/src/coreclr/nativeaot/Runtime/ICodeManager.h b/src/coreclr/nativeaot/Runtime/ICodeManager.h index a508aad49f769e..91b7e3c2b16ba1 100644 --- a/src/coreclr/nativeaot/Runtime/ICodeManager.h +++ b/src/coreclr/nativeaot/Runtime/ICodeManager.h @@ -161,6 +161,11 @@ class ICodeManager virtual bool IsUnwindable(PTR_VOID pvAddress) PURE_VIRTUAL +#ifdef TARGET_ARM64 + virtual bool IsPacPresent(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet ) PURE_VIRTUAL +#endif + virtual bool GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in PTR_PTR_VOID * ppvRetAddrLocation // out diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 5762e817a88904..5ec950269cdfb2 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -65,6 +65,10 @@ EXTERN_C CODE_LOCATION RhpRethrow2; #define FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY(msg) { ASSERT_UNCONDITIONALLY(msg); RhFailFast(); } #endif +#if defined(TARGET_ARM64) +extern "C" void* PacStripPtr(void* ptr); +#endif // TARGET_ARM64 + StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PInvokeTransitionFrame* pInitialTransitionFrame) { STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ GC ]\n"); @@ -1779,7 +1783,12 @@ void StackFrameIterator::NextInternal() // if the thread is safe to walk, it better not have a hijack in place. ASSERT(!m_pThread->IsHijacked()); +#if defined(TARGET_ARM64) + // TODO-PAC: Authenticate instead of stripping the return address. + SetControlPC(PacStripPtr(dac_cast(PCODEToPINSTR(m_RegDisplay.GetIP())))); +#else SetControlPC(dac_cast(PCODEToPINSTR(m_RegDisplay.GetIP()))); +#endif // TARGET_ARM64 PTR_VOID collapsingTargetFrame = NULL; @@ -2110,6 +2119,11 @@ void StackFrameIterator::CalculateCurrentMethodState() return; } +#if defined(TARGET_ARM64) + //TODO-PAC: Authenticate instead of stripping the return addresses. + m_ControlPC = PacStripPtr(m_ControlPC); +#endif // TARGET_ARM64 + // Assume that the caller is likely to be in the same module if (m_pCodeManager == NULL || !m_pCodeManager->FindMethodInfo(m_ControlPC, &m_methodInfo)) { diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S index fe0077f7a54c2e..3439e9d69e74c7 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S @@ -107,6 +107,7 @@ // Fix the stack by restoring the original return address // ldr lr, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + xpaclri // // Clear hijack state diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm index f28ad05e86fcd4..1469c3a0949b91 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm @@ -116,6 +116,7 @@ PROBE_FRAME_SIZE field 0 ;; Fix the stack by restoring the original return address ;; ldr lr, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + DCD 0xD50320FF ;; xpaclri instruction in binary to avoid error while compiling with non-PAC enabled compilers ;; ;; Clear hijack state diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S index ea5d91a1a1c1f9..2b020360e17700 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S @@ -3,3 +3,29 @@ #include #include "AsmOffsets.inc" + +// void* PacStripPtr(void *); +// This function strips the pointer of PAC info that is passed as an agrument. +// To avoid failing on non-PAC enabled machines, we use xpaclri (instead of xpaci) which strips lr explicitly. +// Thus we move need to move input in lr, strip it and copy it back to the result register. +.arch_extension pauth + LEAF_ENTRY PacStripPtr, _TEXT + mov x9, lr + mov lr, x0 + xpaclri + mov x0, lr + ret x9 + LEAF_END PacStripPtr, _TEXT + +// void* PacSignPtr(void *, void *); +// This function sign the input pointer using zero as salt. +// To avoid failing on non-PAC enabled machines, we use paciaz (instead of paciza) which signs lr explicitly. +// Thus we need to move input in lr, sign it and then copy it back to the result register. +.arch_extension pauth + LEAF_ENTRY PacSignPtr, _TEXT + mov x17, x0 + mov x16, x1 + pacia1716 + mov x0, x17 + ret + LEAF_END PacSignPtr, _TEXT \ No newline at end of file diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm index 49baea4977259b..0b41103f9ef30c 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm @@ -5,4 +5,28 @@ TEXTAREA +; void* PacStripPtr(void *); +; This function strips the pointer of PAC info that is passed as an agrument. +; To avoid failing on non-PAC enabled machines, we use xpaclri (instead of xpaci) which strips lr explicitly. +; Thus we move need to move input in lr, strip it and copy it back to the result register. + LEAF_ENTRY PacStripPtr + mov x9, lr + mov lr, x0 + DCD 0xD50320FF ; xpaclri instruction in binary to avoid error while compiling with non-PAC enabled compilers + mov x0, lr + ret x9 + LEAF_END PacStripPtr + +; void* PacSignPtr(void *, void *); +; This function sign the input pointer using zero as salt. +; To avoid failing on non-PAC enabled machines, we use paciaz (instead of paciza) which signs lr explicitly. +; Thus we need to move input in lr, sign it and then copy it back to the result register. + LEAF_ENTRY PacSignPtr + mov x17, x0 + mov x16, x1 + DCD 0xD503233F ; paciasp instruction in binary to avoid error while compiling with non-PAC enabled compilers + mov x0, x17 + ret + LEAF_END PacSignPtr + end diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index fe65344da8f791..5f85e117e9aca1 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -37,6 +37,11 @@ static Thread* g_RuntimeInitializingThread; #endif //!DACCESS_COMPILE +#if defined(TARGET_ARM64) +extern "C" void* PacSignPtr(void* ptr, void* sp); +extern "C" void* PacStripPtr(void* ptr); +#endif // TARGET_ARM64 + ee_alloc_context::PerThreadRandom::PerThreadRandom() { minipal_xoshiro128pp_init(&random_state, (uint32_t)minipal_hires_ticks()); @@ -806,8 +811,15 @@ void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, Hijack CrossThreadUnhijack(); void* pvRetAddr = *ppvRetAddrLocation; + ASSERT(pvRetAddr != NULL); + +#if defined(TARGET_ARM64) + //TODO-PAC: Authenticate instead of stripping the return addresses. + ASSERT(StackFrameIterator::IsValidReturnAddress(PacStripPtr(pvRetAddr))); +#else ASSERT(StackFrameIterator::IsValidReturnAddress(pvRetAddr)); +#endif // TARGET_ARM64 m_ppvHijackedReturnAddressLocation = ppvRetAddrLocation; m_pvHijackedReturnAddress = pvRetAddr; @@ -817,7 +829,14 @@ void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, Hijack frameIterator->GetRegisterSet())); #endif - *ppvRetAddrLocation = (void*)pfnHijackFunction; + void* pvHijackedAddr = (void*)pfnHijackFunction; +#if defined(TARGET_ARM64) + if (frameIterator->GetCodeManager()->IsPacPresent(frameIterator->GetMethodInfo(), frameIterator->GetRegisterSet())) + { + pvHijackedAddr = PacSignPtr(pvHijackedAddr, (void*)frameIterator->GetRegisterSet()->pFP); + } +#endif // TARGET_ARM64 + *ppvRetAddrLocation = pvHijackedAddr; STRESS_LOG2(LF_STACKWALK, LL_INFO10000, "InternalHijack: TgtThread = %llx, IP = %p\n", GetOSThreadId(), frameIterator->GetRegisterSet()->GetIP()); diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index 266b56bd1f6e4e..ea0f6ca0b22746 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -64,6 +64,84 @@ UnixNativeCodeManager::~UnixNativeCodeManager() { } +#if defined(TARGET_ARM64) +static size_t readULEB(const uint8_t *&p, const uint8_t *end) +{ + size_t result = 0; + unsigned shift = 0; + while (p < end) { + uint8_t byte = *p++; + result |= size_t(byte & 0x7F) << shift; + if ((byte & 0x80) == 0) // clear top bit indicates the last by of the value + break; + shift += 7; + } + return result; +} + +bool UnixNativeCodeManager::IsPacPresent(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet) +{ + UnixNativeMethodInfo* pNativeMethodInfo = (UnixNativeMethodInfo*)pMethodInfo; + const uint8_t *p = (uint8_t *) pNativeMethodInfo->unwind_info; + const uint8_t *end = p + *((uint32_t *)p); + p += 4; // Skip length + assert(*((uint32_t *)p) != 0); // Ensure it's FDE entry + p += 4; // Skip offset to CIE + p += 4; // Skip PC start + p += 4; // Skip function length + size_t augmentationLength = readULEB(p, end); + p += augmentationLength; // skip augmentation data + + while (p < end) { + uint8_t op = *p++; + + if (op == DW_CFA_AARCH64_negate_ra_state) + { + return true; + } + + if ((op & 0xC0) == DW_CFA_advance_loc) + { + continue; + } + if ((op & ~(0x3F)) == DW_CFA_offset) + { + readULEB(p, end); // offset + continue; + } + + // Extended, single‐byte opcodes: + switch (op) { + case DW_CFA_advance_loc1: + case DW_CFA_def_cfa_register: + p++; // offset + break; + + case DW_CFA_offset_extended_sf: + case DW_CFA_offset_extended: + readULEB(p, end); // register + readULEB(p, end); // offset + break; + + case DW_CFA_def_cfa_offset: // DW_CFA_def_cfa_offset + readULEB(p, end); // offset + break; + + case DW_CFA_def_cfa: // DW_CFA_def_cfa + p++; // register + readULEB(p, end); // offset + break; + + default: // Unknown unwind op code + //TODO-PAC: Handle unknown op codes correctly. return false/assert false? + p++; + } + } + return false; +} +#endif // TARGET_ARM64 + // Virtually unwind stack to the caller of the context specified by the REGDISPLAY bool UnixNativeCodeManager::VirtualUnwind(MethodInfo* pMethodInfo, REGDISPLAY* pRegisterSet) { diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h index ca3f3f2272bde1..be455860b3277e 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h @@ -73,6 +73,10 @@ class UnixNativeCodeManager : public ICodeManager REGDISPLAY * pRegisterSet, // in PTR_PTR_VOID * ppvRetAddrLocation); // out +#if defined(TARGET_ARM64) + bool IsPacPresent(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet); +#endif // TARGET_ARM64 + PTR_VOID RemapHardwareFaultToGCSafePoint(MethodInfo * pMethodInfo, PTR_VOID controlPC); bool EHEnumInit(MethodInfo * pMethodInfo, PTR_VOID * pMethodStartAddress, EHEnumState * pEHEnumState); diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index ec315adbd9e39c..a95a7a0b125fd9 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -831,6 +831,55 @@ bool CoffNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) return true; } +#if defined(TARGET_ARM64) +bool CoffNativeCodeManager::IsPacPresent(MethodInfo * pMethodInfo, + REGDISPLAY * pRegisterSet) +{ + CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo; + + size_t unwindDataBlobSize; + + PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pNativeMethodInfo->runtimeFunction, &unwindDataBlobSize); + + PTR_uint8_t UnwindCodePtr = dac_cast(pUnwindDataBlob); + PTR_uint8_t UnwindCodesEndPtr = dac_cast(pUnwindDataBlob) + unwindDataBlobSize; + + while (UnwindCodePtr < UnwindCodesEndPtr) + { + uint8_t CurCode = * UnwindCodePtr; + if ((CurCode & 0xfe) == 0xe4) // The last unwind code + { + break; + } + + if (CurCode == 0xFC) // Unwind code for PAC (pac_sign_lr) + { + return true; + } + + if (CurCode < 0xC0) + { + UnwindCodePtr += 1; + } + else if (CurCode < 0xE0) + { + UnwindCodePtr += 2; + } + else + { + static const BYTE UnwindCodeSizeTable[32] = + { + 4,1,2,1,1,1,1,3, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 2,3,4,5,1,1,1,1 + }; + + UnwindCodePtr += UnwindCodeSizeTable[CurCode - 0xE0]; + } + } + + return false; +} +#endif //TARGET_ARM64 + bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in PTR_PTR_VOID * ppvRetAddrLocation) // out diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h index c85f5250967793..0f54e591f95298 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h @@ -90,6 +90,8 @@ class CoffNativeCodeManager : public ICodeManager bool IsUnwindable(PTR_VOID pvAddress); + bool IsPacPresent(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet); + bool GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in PTR_PTR_VOID * ppvRetAddrLocation); // out diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs index 25d81deb39aabc..1e3a2a1f37b23e 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs @@ -11,6 +11,7 @@ internal enum CFI_OPCODE CFI_ADJUST_CFA_OFFSET, // Offset is adjusted relative to the current one. CFI_DEF_CFA_REGISTER, // New register is used to compute CFA CFI_REL_OFFSET, // Register is saved at offset from the current CFA - CFI_DEF_CFA // Take address from register and add offset to it. + CFI_DEF_CFA, // Take address from register and add offset to it. + CFI_NEGATE_RA_STATE, // Sign the return address in lr with paciaz } } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfFde.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfFde.cs index f405fddde48a6b..8c98a416d63e6b 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfFde.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfFde.cs @@ -113,6 +113,10 @@ private static byte[] CfiCodeToInstructions(DwarfCie cie, byte[] blobData) cfaOffset = cfiOffset; cfiCodeOffset += DwarfHelper.WriteULEB128(cfiCode.AsSpan(cfiCodeOffset), (uint)cfaOffset); break; + + case CFI_OPCODE.CFI_NEGATE_RA_STATE: + cfiCode[cfiCodeOffset++] = DW_CFA_AARCH64_negate_ra_state; + break; } } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Eabi/EabiUnwindConverter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Eabi/EabiUnwindConverter.cs index 5db4b2cfeb7e25..f5d6372535a9bd 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Eabi/EabiUnwindConverter.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Eabi/EabiUnwindConverter.cs @@ -121,6 +121,10 @@ public static byte[] ConvertCFIToEabi(byte[] blobData) EmitSpAdjustment(cfiOffset); } break; + + case CFI_OPCODE.CFI_NEGATE_RA_STATE: + // Do nothing here. + break; } } diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index d1508e4f738828..205d96c4ab9b28 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -93,7 +93,8 @@ private enum CFI_OPCODE CFI_ADJUST_CFA_OFFSET, // Offset is adjusted relative to the current one. CFI_DEF_CFA_REGISTER, // New register is used to compute CFA CFI_REL_OFFSET, // Register is saved at offset from the current CFA - CFI_DEF_CFA // Take address from register and add offset to it. + CFI_DEF_CFA, // Take address from register and add offset to it. + CFI_NEGATE_RA_STATE, // Sign the return address in lr with paciaz } // Get the CFI data in the same shape as clang/LLVM generated one. This improves the compatibility with libunwind and other unwind solutions @@ -124,6 +125,7 @@ private static byte[] CompressARM64CFI(byte[] blobData) } int offset = 0; + bool shouldAddPACOpCode = false; while (offset < blobData.Length) { codeOffset = Math.Max(codeOffset, blobData[offset++]); @@ -177,6 +179,10 @@ private static byte[] CompressARM64CFI(byte[] blobData) } } break; + + case CFI_OPCODE.CFI_NEGATE_RA_STATE: + shouldAddPACOpCode = true; + break; } } @@ -186,6 +192,14 @@ private static byte[] CompressARM64CFI(byte[] blobData) using (BinaryWriter cfiWriter = new BinaryWriter(cfiStream)) { + if (shouldAddPACOpCode) + { + cfiWriter.Write((byte)codeOffset); + cfiWriter.Write((byte)CFI_OPCODE.CFI_NEGATE_RA_STATE); + cfiWriter.Write((short)-1); + cfiWriter.Write(cfaOffset); + } + if (cfaRegister != -1) { cfiWriter.Write((byte)codeOffset); diff --git a/src/coreclr/unwinder/arm64/unwinder.cpp b/src/coreclr/unwinder/arm64/unwinder.cpp index f76a97c67f88ec..68433f05d342cf 100644 --- a/src/coreclr/unwinder/arm64/unwinder.cpp +++ b/src/coreclr/unwinder/arm64/unwinder.cpp @@ -31,6 +31,10 @@ #define FIELD_OFFSET(type, field) ((LONG)__builtin_offsetof(type, field)) #endif +#if !defined(DACCESS_COMPILE) && !defined(FEATURE_CDAC_UNWINDER) +extern "C" void* PacStripPtr(void* ptr); +#endif // !defined(DACCESS_COMPILE) && !defined(FEATURE_CDAC_UNWINDER) + #ifdef HOST_UNIX #define RtlZeroMemory ZeroMemory @@ -251,16 +255,71 @@ do { #endif // !defined(DEBUGGER_UNWIND) -// // Macros for stripping pointer authentication (PAC) bits. -// +#if !defined(DACCESS_COMPILE) && !defined(FEATURE_CDAC_UNWINDER) -#if !defined(DEBUGGER_STRIP_PAC) +#define STRIP_PAC(pointer) RtlStripPacOnline(pointer) -// NOTE: Pointer authentication is not used by .NET, so the implementation does nothing -#define STRIP_PAC(Params, pointer) +FORCEINLINE +VOID RtlStripPacOnline(_Inout_ PULONG64 Pointer) -#endif +/*++ + +Routine Description: + + This routine strips the ARM64 Pointer Authentication Code (PAC) from a + pointer using the ARM64-native xpaci intrinsic directly. Hence this should + only be called when stripping a pointer at runtime (not debugger) + +Arguments: + + Pointer - Supplies a pointer to the pointer whose PAC will be stripped. + +Return Value: + + None. + +--*/ + +{ + *Pointer = (ULONG64)PacStripPtr((void *) (*Pointer)); +} +#else + +#define STRIP_PAC(pointer) RtlStripPacManual(pointer) + +FORCEINLINE +VOID +RtlStripPacManual( + _Inout_ PULONG64 Pointer +) +/*++ + +Routine Description: + + This routine manually strips the ARM64 Pointer Authentication Code (PAC) + from a pointer. This is functionally similar to the XPAC family of + instructions. + + N.B. Even though PAC is only supported on ARM64, this routine is available + on all architectures to conveniently enable scenarios such as the + Debugger. + +Arguments: + + Pointer - Supplies a pointer to the pointer whose PAC will be stripped. + +Return Value: + + None. + +--*/ +{ + *Pointer &= 0x0000FFFFFFFFFFFF; + return; +} + +#endif // !defined(DACCESS_COMPILE) && !defined(FEATURE_CDAC_UNWINDER) // // Macros to clarify opcode parsing @@ -2343,7 +2402,8 @@ Return Value: return STATUS_UNWIND_INVALID_SEQUENCE; } - STRIP_PAC(UnwindParams, &ContextRecord->Lr); + // TODO-PAC: Authenticate instead of stripping the return address. + STRIP_PAC(&ContextRecord->Lr); // // TODO: Implement support for UnwindFlags RTL_VIRTUAL_UNWIND2_VALIDATE_PAC. diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index c98dde2892ff84..ef46c3153888b1 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -306,12 +306,12 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__InterpMethod__pCallStub == offsetof(InterpMethod #ifdef TARGET_UNIX #ifdef _DEBUG -#define OFFSETOF__Thread__m_pInterpThreadContext 0xb30 +#define OFFSETOF__Thread__m_pInterpThreadContext 0xb38 #else // _DEBUG -#define OFFSETOF__Thread__m_pInterpThreadContext 0x2c8 +#define OFFSETOF__Thread__m_pInterpThreadContext 0x2d0 #endif // _DEBUG #else // TARGET_UNIX -#define OFFSETOF__Thread__m_pInterpThreadContext 0xb58 +#define OFFSETOF__Thread__m_pInterpThreadContext 0xb60 #endif // TARGET_UNIX ASMCONSTANTS_C_ASSERT(OFFSETOF__Thread__m_pInterpThreadContext == offsetof(Thread, m_pInterpThreadContext)) diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 93abb4d13fe60f..04747f8bdd6ba5 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -161,11 +161,38 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler EPILOG_RESTORE_REG_PAIR x25, x26, 64 EPILOG_RESTORE_REG_PAIR x27, x28, 80 EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 192 + xpaclri EPILOG_RETURN NESTED_END OnHijackTripThread, _TEXT #endif // FEATURE_HIJACK +// void* PacStripPtr(void *); +// This function strips the pointer of PAC info that is passed as an agrument. +// To avoid failing on non-PAC enabled machines, we use xpaclri (instead of xpaci) which strips lr explicitly. +// Thus we move need to move input in lr, strip it and copy it back to the result register. +.arch_extension pauth + LEAF_ENTRY PacStripPtr, _TEXT + mov x9, lr + mov lr, x0 + xpaclri + mov x0, lr + ret x9 + LEAF_END PacStripPtr, _TEXT + +// void* PacSignPtr(void *, void *); +// This function sign the input pointer using zero as salt. +// To avoid failing on non-PAC enabled machines, we use paciaz (instead of paciza) which signs lr explicitly. +// Thus we need to move input in lr, sign it and then copy it back to the result register. +.arch_extension pauth + LEAF_ENTRY PacSignPtr, _TEXT + mov x17, x0 + mov x16, x1 + pacia1716 + mov x0, x17 + ret + LEAF_END PacSignPtr, _TEXT + // ------------------------------------------------------------------ // Redirection Stub for GC in fully interruptible method //GenerateRedirectedHandledJITCaseStub GCThreadControl diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index c501e0b50fa24a..865e1693032a23 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -496,11 +496,37 @@ COMToCLRDispatchHelper_RegSetup EPILOG_RESTORE_REG_PAIR x25, x26, #64 EPILOG_RESTORE_REG_PAIR x27, x28, #80 EPILOG_RESTORE_REG_PAIR fp, lr, #192! + + DCD 0xD50320FF ; xpaclri instruction in binary to avoid error while compiling with non-PAC enabled compilers EPILOG_RETURN NESTED_END #endif ; FEATURE_HIJACK +; void* PacStripPtr(void *); +; This function strips the pointer of PAC info that is passed as an agrument. +; To avoid failing on non-PAC enabled machines, we use xpaclri (instead of xpaci) which strips lr explicitly. +; Thus we move need to move input in lr, strip it and copy it back to the result register. + LEAF_ENTRY PacStripPtr + mov x9, lr + mov lr, x0 + DCD 0xD50320FF ; xpaclri instruction in binary to avoid error while compiling with non-PAC enabled compilers + mov x0, lr + ret x9 + LEAF_END PacStripPtr + +; void* PacSignPtr(void *, void *); +; This function sign the input pointer using zero as salt. +; To avoid failing on non-PAC enabled machines, we use paciaz (instead of paciza) which signs lr explicitly. +; Thus we need to move input in lr, sign it and then copy it back to the result register. + LEAF_ENTRY PacSignPtr + mov x17, x0 + mov x16, x1 + DCD 0xD503233F ; paciasp instruction in binary to avoid error while compiling with non-PAC enabled compilers + mov x0, $x17 + ret + LEAF_END PacSignPtr + ;; ------------------------------------------------------------------ ;; Redirection Stub for GC in fully interruptible method GenerateRedirectedHandledJITCaseStub GCThreadControl diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index 1a8e0367bbb001..79536d371f7976 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -218,7 +218,8 @@ typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA inline PCODE GetIP(const T_CONTEXT * context) { LIMITED_METHOD_DAC_CONTRACT; - return context->Pc; + //TODO-PAC: Strip/Authenticate while populating the context. + return (PCODE) context->Pc; } inline void SetIP(T_CONTEXT *context, PCODE eip) { diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index d285f2b1f184d4..b7176ae23cdcbf 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6909,6 +6909,82 @@ bool IsIPInEpilog(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, BOOL *pSaf return fIsInEpilog; } +#if defined(TARGET_ARM64) +// This function is used to check if Pointer Authentication (PAC) is enabled for this stack frame or not. +bool IsPacPresent(EECodeInfo *pCodeInfo) +{ + _ASSERTE(pCodeInfo->IsValid()); + + // Lookup the function entry for the IP + PTR_RUNTIME_FUNCTION FunctionEntry = pCodeInfo->GetFunctionEntry(); + + // We should always get a function entry for a managed method + _ASSERTE(FunctionEntry != NULL); + DWORD_PTR ImageBase = pCodeInfo->GetModuleBase(); + + _ASSERTE((FunctionEntry->UnwindData & 3) == 0); // Packed unwind data are not used with managed code + ULONG_PTR UnwindDataPtr = (ULONG_PTR)(ImageBase + FunctionEntry->UnwindData); + + // Read the header word. For unwind info layout details refer https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling?view=msvc-170#arm64-exception-handling-information + DWORD HeaderWord = *(DWORD*)UnwindDataPtr; + UnwindDataPtr += 4; + + _ASSERTE(((HeaderWord >> 18) & 3) == 0); // Version 0 is the only supported version. + + ULONG UnwindWords = (HeaderWord >> 27) & 31; + ULONG EpilogScopeCount = (HeaderWord >> 22) & 31; + if (EpilogScopeCount == 0 && UnwindWords == 0) + { + EpilogScopeCount = *(DWORD*)UnwindDataPtr; + UnwindDataPtr += 4; + UnwindWords = (EpilogScopeCount >> 16) & 0xff; + EpilogScopeCount &= 0xffff; + } + + if ((HeaderWord & (1 << 21)) != 0) + { + EpilogScopeCount = 0; + } + + ULONG_PTR UnwindCodePtr = UnwindDataPtr + 4 * EpilogScopeCount; + ULONG_PTR UnwindCodesEndPtr = UnwindCodePtr + 4 * UnwindWords; + + while (UnwindCodePtr < UnwindCodesEndPtr) + { + ULONG CurCode = *(BYTE*)UnwindCodePtr; + if ((CurCode & 0xfe) == 0xe4) // The last unwind code + { + break; + } + + if (CurCode == 0xFC) // Unwind code for PAC (pac_sign_lr) + { + return true; + } + + if (CurCode < 0xC0) + { + UnwindCodePtr += 1; + } + else if (CurCode < 0xE0) + { + UnwindCodePtr += 2; + } + else + { + static const BYTE UnwindCodeSizeTable[32] = + { + 4,1,2,1,1,1,1,3, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 2,3,4,5,1,1,1,1 + }; + + UnwindCodePtr += UnwindCodeSizeTable[CurCode - 0xE0]; + } + } + + return false; +} +#endif // TARGET_ARM64 + #endif // FEATURE_HIJACK && (!TARGET_X86 || TARGET_UNIX) #define EXCEPTION_VISUALCPP_DEBUGGER ((DWORD) (1<<30 | 0x6D<<16 | 5000)) diff --git a/src/coreclr/vm/excep.h b/src/coreclr/vm/excep.h index 50361b6fd18e2c..66dd424b8d47e2 100644 --- a/src/coreclr/vm/excep.h +++ b/src/coreclr/vm/excep.h @@ -34,7 +34,9 @@ BOOL AdjustContextForJITHelpers(EXCEPTION_RECORD *pExceptionRecord, CONTEXT *pCo // General purpose functions for use on an IP in jitted code. bool IsIPInProlog(EECodeInfo *pCodeInfo); bool IsIPInEpilog(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, BOOL *pSafeToInjectThreadAbort); - +#if defined(TARGET_ARM64) +bool IsPacPresent(EECodeInfo *pCodeInfo); +#endif // TARGET_ARM64 #endif // FEATURE_HIJACK && (!TARGET_X86 || TARGET_UNIX) // Enums diff --git a/src/coreclr/vm/tailcallhelp.cpp b/src/coreclr/vm/tailcallhelp.cpp index da2dcca7d1da13..fa8ea22a9b4ad6 100644 --- a/src/coreclr/vm/tailcallhelp.cpp +++ b/src/coreclr/vm/tailcallhelp.cpp @@ -10,6 +10,9 @@ #include "gcrefmap.h" #include "threads.h" +#if defined(TARGET_ARM64) +extern "C" void* PacStripPtr(void* ptr); +#endif // TARGET_ARM64 FCIMPL0(void*, TailCallHelp::GetTailCallArgBuffer) { @@ -39,7 +42,14 @@ FCIMPL2(void*, TailCallHelp::GetTailCallInfo, void** retAddrSlot, void** retAddr Thread* thread = GetThread(); - *retAddr = thread->GetReturnAddress(retAddrSlot); + void* retAddrFromSlot = thread->GetReturnAddress(retAddrSlot); + +#if defined(TARGET_ARM64) + //TODO-PAC: Authenticate instead of stripping the return address. + retAddrFromSlot = PacStripPtr(retAddrFromSlot); +#endif // TARGET_ARM64 + *retAddr = retAddrFromSlot; + return thread->GetTailCallTls(); } FCIMPLEND diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index 572c57fd9a08e3..f40903b6f05dd2 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -2563,9 +2563,10 @@ class Thread private: #ifdef FEATURE_HIJACK - void HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86_ARG(bool hasAsyncRet)); + void HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86_ARG(bool hasAsyncRet) ARM64_ARG(bool isPacEnabledFrame)); VOID *m_pvHJRetAddr; // original return address (before hijack) + VOID *m_pSp; // stack pointer of the frame being hijacked VOID **m_ppvHJRetAddrPtr; // place we bashed a new return address MethodDesc *m_HijackedFunction; // remember what we hijacked diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index e003ae328df79b..f38a0256dc2861 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -23,6 +23,10 @@ #define HIJACK_NONINTERRUPTIBLE_THREADS +#if defined(TARGET_ARM64) +extern "C" void* PacSignPtr(void* ptr, void* sp); +#endif // TARGET_ARM64 + bool ThreadSuspend::s_fSuspendRuntimeInProgress = false; bool ThreadSuspend::s_fSuspended = false; @@ -4528,6 +4532,7 @@ struct ExecutionState bool m_IsInterruptible; // is this code interruptible? MethodDesc *m_pFD; // current function/method we're executing VOID **m_ppvRetAddrPtr; // pointer to return address in frame + VOID *m_ppvSp; // pointer to stack pointer in frame DWORD m_RelOffset; // relative offset at which we're currently executing in this fcn IJitManager *m_pJitManager; METHODTOKEN m_MethodToken; @@ -4542,7 +4547,7 @@ struct ExecutionState }; // Client is responsible for suspending the thread before calling -void Thread::HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86_ARG(bool hasAsyncRet)) +void Thread::HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86_ARG(bool hasAsyncRet) ARM64_ARG(bool isPacEnabledFrame)) { CONTRACTL { NOTHROW; @@ -4598,6 +4603,7 @@ void Thread::HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86 // Remember the place that the return would have gone m_pvHJRetAddr = *esb->m_ppvRetAddrPtr; + m_pSp = esb->m_ppvSp; IS_VALID_CODE_PTR((FARPROC) (TADDR)m_pvHJRetAddr); // TODO [DAVBR]: For the full fix for VsWhidbey 450273, the below @@ -4610,6 +4616,13 @@ void Thread::HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86 m_HijackedFunction = esb->m_pFD; // Bash the stack to return to one of our stubs +#if defined(TARGET_ARM64) + if (isPacEnabledFrame) + { + pvHijackAddr = PacSignPtr(pvHijackAddr, m_pSp); + } +#endif // TARGET_ARM64 + *esb->m_ppvRetAddrPtr = pvHijackAddr; SetThreadState(TS_Hijacked); } @@ -4689,6 +4702,7 @@ StackWalkAction SWCB_GetExecutionState(CrawlFrame *pCF, VOID *pData) pES->m_pFD = pCF->GetFunction(); pES->m_MethodToken = pCF->GetMethodToken(); pES->m_ppvRetAddrPtr = 0; + pES->m_ppvSp = (void *) GetRegdisplaySP(pCF->GetRegisterSet()); pES->m_IsInterruptible = pCF->IsGcSafe(); pES->m_RelOffset = pCF->GetRelOffset(); pES->m_pJitManager = pCF->GetJitManager(); @@ -5316,9 +5330,13 @@ BOOL Thread::HandledJITCase() X86_ONLY(ReturnKind returnKind;) X86_ONLY(bool hasAsyncRet;) + ARM64_ONLY(bool isPacEnabledFrame;) if (GetReturnAddressHijackInfo(&codeInfo X86_ARG(&returnKind) X86_ARG(&hasAsyncRet))) { - HijackThread(&esb X86_ARG(returnKind) X86_ARG(hasAsyncRet)); +#ifdef TARGET_ARM64 + isPacEnabledFrame = IsPacPresent(&codeInfo); +#endif + HijackThread(&esb X86_ARG(returnKind) X86_ARG(hasAsyncRet) ARM64_ARG(isPacEnabledFrame)); } } } @@ -5870,7 +5888,11 @@ void HandleSuspensionForInterruptedThread(CONTEXT *interruptedContext) StackWalkerWalkingThreadHolder threadStackWalking(pThread); // Hijack the return address to point to the appropriate routine based on the method's return type. - pThread->HijackThread(&executionState X86_ARG(returnKind) X86_ARG(hasAsyncRet)); + ARM64_ONLY(bool isPacEnabledFrame); +#ifdef TARGET_ARM64 + isPacEnabledFrame = IsPacPresent(&codeInfo); +#endif + pThread->HijackThread(&executionState X86_ARG(returnKind) X86_ARG(hasAsyncRet) ARM64_ARG(isPacEnabledFrame)); } } diff --git a/src/native/external/llvm-libunwind/src/DwarfInstructions.hpp b/src/native/external/llvm-libunwind/src/DwarfInstructions.hpp index 32112839042b58..cdc8225538470d 100644 --- a/src/native/external/llvm-libunwind/src/DwarfInstructions.hpp +++ b/src/native/external/llvm-libunwind/src/DwarfInstructions.hpp @@ -354,7 +354,18 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, if (cieInfo.addressesSignedWithBKey) asm("hint 0xe" : "+r"(x17) : "r"(x16)); // autib1716 else - asm("hint 0xc" : "+r"(x17) : "r"(x16)); // autia1716 + { + //TODO-PAC: Restore the authentication with A key when signing with SP is in place. + //asm("hint 0xc" : "+r"(x17) : "r"(x16)); // autia1716 + __asm__ __volatile__ ("mov x9, lr\n\t" + "mov lr, %0\n\t" + "xpaclri\n\t" + "mov %0, lr\n\t" + "mov lr, x9" + : "+r"(x17) + : + : "x9", "lr"); // strip PAC + } } returnAddress = x17; #endif From 584a268ebbd978791dd9d24d473ae314494c998b Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Wed, 11 Mar 2026 17:32:43 +0000 Subject: [PATCH 02/65] Fix build failures --- src/coreclr/jit/emitarm64.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index e94ef9f1e720e9..0fea001b786b54 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1441,7 +1441,7 @@ void emitter::emitPacInProlog() return; } emitIns(INS_paciasp); - emitComp->unwindPacSignLR(); + m_compiler->unwindPacSignLR(); } //------------------------------------------------------------------------ @@ -1454,7 +1454,7 @@ void emitter::emitPacInEpilog() return; } emitIns(INS_autiasp); - emitComp->unwindPacSignLR(); + m_compiler->unwindPacSignLR(); } //------------------------------------------------------------------------ From 74a52939c11fac1c8decbea616a2c047fa42e10f Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 12 Mar 2026 10:12:30 +0000 Subject: [PATCH 03/65] Fix build errors --- src/coreclr/jit/codegenarm64.cpp | 2 +- src/coreclr/vm/amd64/asmconstants.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index de59099b03df9d..0b3a3be686f869 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -1640,7 +1640,7 @@ void CodeGen::genFuncletEpilog() } GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, imm); - m_compiler->unwindSaveRegPair(REG_FP, REG_LR, imm); + m_compiler->unwindSaveRegPair(REG_FP, REG_LR, static_cast(imm)); GetEmitter()->emitPacInEpilog(); genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, tempReg, nullptr, /* reportUnwindData */ true); } diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index 5a1dd76dd21240..0e5b4525e5496d 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -570,7 +570,7 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__InterpMethod__pCallStub == offsetof(InterpMethod #ifdef TARGET_UNIX #ifdef _DEBUG -#define OFFSETOF__Thread__m_pInterpThreadContext 0xaf8 +#define OFFSETOF__Thread__m_pInterpThreadContext 0xb00 #else // _DEBUG #define OFFSETOF__Thread__m_pInterpThreadContext 0x290 #endif // _DEBUG From c9c43c95299764a4a092bb1820389b65269404e3 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 12 Mar 2026 12:20:26 +0000 Subject: [PATCH 04/65] Limit thread context changes to Arm64 --- src/coreclr/vm/amd64/asmconstants.h | 2 +- src/coreclr/vm/threads.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index 0e5b4525e5496d..5a1dd76dd21240 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -570,7 +570,7 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__InterpMethod__pCallStub == offsetof(InterpMethod #ifdef TARGET_UNIX #ifdef _DEBUG -#define OFFSETOF__Thread__m_pInterpThreadContext 0xb00 +#define OFFSETOF__Thread__m_pInterpThreadContext 0xaf8 #else // _DEBUG #define OFFSETOF__Thread__m_pInterpThreadContext 0x290 #endif // _DEBUG diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index baeed02b3cd479..6e05eb897d52ee 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -2513,7 +2513,9 @@ class Thread void HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86_ARG(bool hasAsyncRet) ARM64_ARG(bool isPacEnabledFrame)); VOID *m_pvHJRetAddr; // original return address (before hijack) +#ifdef TARGET_ARM64 VOID *m_pSp; // stack pointer of the frame being hijacked +#endif VOID **m_ppvHJRetAddrPtr; // place we bashed a new return address MethodDesc *m_HijackedFunction; // remember what we hijacked From ce5d79bb26b7247a818d3c84b33de47764c3fed9 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 12 Mar 2026 13:26:48 +0000 Subject: [PATCH 05/65] Fix build issues on x86 --- src/coreclr/vm/threadsuspend.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index fe5a657090cb64..3d17756a19e710 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -4545,7 +4545,10 @@ void Thread::HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86 // Remember the place that the return would have gone m_pvHJRetAddr = *esb->m_ppvRetAddrPtr; + +#if defined(TARGET_ARM64) m_pSp = esb->m_ppvSp; +#endif // TARGET_ARM64 IS_VALID_CODE_PTR((FARPROC) (TADDR)m_pvHJRetAddr); // TODO [DAVBR]: For the full fix for VsWhidbey 450273, the below From 867e149bb4b6ef3f802cf9943631da1c24434412 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Wed, 18 Mar 2026 11:43:59 +0000 Subject: [PATCH 06/65] Limit framelayout changes to JIT --- src/coreclr/jit/codegenarm64.cpp | 63 +++++++++++++++++------- src/coreclr/jit/codegenarmarch.cpp | 17 +++++-- src/coreclr/nativeaot/Runtime/thread.cpp | 2 +- 3 files changed, 61 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 0b3a3be686f869..b0c234901e87bb 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -213,7 +213,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) { case 1: { - if (JitConfig.JitPacEnabled() != 0) + if (JitConfig.JitPacEnabled() != 0 && !m_compiler->IsAot()) { // Generate: // ldp fp,lr,[sp] @@ -245,7 +245,10 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, m_compiler->lvaOutgoingArgSpaceSize); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize); - GetEmitter()->emitPacInEpilog(); + if (!m_compiler->IsAot()) + { + GetEmitter()->emitPacInEpilog(); + } GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); m_compiler->unwindAllocStack(totalFrameSize); break; @@ -289,6 +292,11 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spAdjust); m_compiler->unwindAllocStack(spAdjust); } + + if (m_compiler->IsAot()) + { + GetEmitter()->emitPacInEpilog(); + } } //------------------------------------------------------------------------ @@ -501,7 +509,7 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, { // We can use pre-indexed addressing when pointer authentication PAC is disabled. assert(reg1 != REG_LR); - if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR)) + if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR) && !m_compiler->IsAot()) { // Generate: // sub SP, SP, #spDelta @@ -521,7 +529,6 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX); m_compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta); } - needToSaveRegs = false; } else // (spOffset != 0) || (spDelta < -512) @@ -540,9 +547,9 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, // 64-bit STP offset range: -512 to 504, multiple of 8. assert(spOffset <= 504); assert((spOffset % 8) == 0); - assert(reg1 != REG_LR); - if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR)) + + if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR) && !m_compiler->IsAot()) { assert(reg1 == REG_FP); GetEmitter()->emitPacInProlog(); @@ -662,7 +669,7 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, assert(!useSaveNextPair); if ((spOffset == 0) && (spDelta <= 504)) { - if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR)) + if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR) && !m_compiler->IsAot()) { // We separate SP increment and loading FP/LR when PAC is enabled. assert(reg1 == REG_FP); @@ -688,7 +695,7 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); m_compiler->unwindSaveRegPair(reg1, reg2, spOffset); - if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR)) + if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR) && !m_compiler->IsAot()) { assert(reg1 == REG_FP); GetEmitter()->emitPacInEpilog(); @@ -702,7 +709,7 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, { GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); - if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR)) + if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR) && !m_compiler->IsAot()) { assert(reg1 == REG_FP); GetEmitter()->emitPacInEpilog(); @@ -1411,6 +1418,11 @@ void CodeGen::genFuncletProlog(BasicBlock* block) m_compiler->unwindBegProlog(); + if (m_compiler->IsAot()) + { + GetEmitter()->emitPacInProlog(); + } + regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; regMaskTP maskSaveRegsInt = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat; @@ -1443,13 +1455,16 @@ void CodeGen::genFuncletProlog(BasicBlock* block) bool scratchRegIsZero = false; genAllocLclFrame(-genFuncletInfo.fiSpDelta1, REG_SCRATCH, &scratchRegIsZero, maskArgRegsLiveIn); genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_SCRATCH, nullptr, /* reportUnwindData */ true); - GetEmitter()->emitPacInProlog(); + if (!m_compiler->IsAot()) + { + GetEmitter()->emitPacInProlog(); + } GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); } else { - if (JitConfig.JitPacEnabled() != 0) + if (JitConfig.JitPacEnabled() != 0 && !m_compiler->IsAot()) { // generate sub SP,SP,imm genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_NA, nullptr, /* reportUnwindData */ true); @@ -1481,7 +1496,10 @@ void CodeGen::genFuncletProlog(BasicBlock* block) assert(genFuncletInfo.fiSpDelta2 == 0); - GetEmitter()->emitPacInProlog(); + if (!m_compiler->IsAot()) + { + GetEmitter()->emitPacInProlog(); + } GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSP_to_FPLR_save_delta); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta); @@ -1491,7 +1509,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) else if (genFuncletInfo.fiFrameType == 3) { // Avoid using pre-indexed store when PAC is enabled. - if ((m_compiler->opts.IsOSR()) || (JitConfig.JitPacEnabled() != 0)) + if ((m_compiler->opts.IsOSR()) || ((JitConfig.JitPacEnabled() != 0) && !m_compiler->IsAot())) { // With OSR we may see large values for fiSpDelta1 // We repurpose genAllocLclFram to do the necessary probing. @@ -1628,7 +1646,7 @@ void CodeGen::genFuncletEpilog() { // With OSR we may see large values for fiSpDelta1 // - if (m_compiler->opts.IsOSR() || (JitConfig.JitPacEnabled() != 0)) + if (m_compiler->opts.IsOSR() || ((JitConfig.JitPacEnabled() != 0) && !m_compiler->IsAot())) { ssize_t imm = 0; regNumber tempReg = REG_SCRATCH; @@ -1641,7 +1659,10 @@ void CodeGen::genFuncletEpilog() GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, imm); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, static_cast(imm)); - GetEmitter()->emitPacInEpilog(); + if(!m_compiler->IsAot()) + { + GetEmitter()->emitPacInEpilog(); + } genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, tempReg, nullptr, /* reportUnwindData */ true); } else @@ -1659,7 +1680,10 @@ void CodeGen::genFuncletEpilog() GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSP_to_FPLR_save_delta); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta); - GetEmitter()->emitPacInEpilog(); + if(!m_compiler->IsAot()) + { + GetEmitter()->emitPacInEpilog(); + } // fiFrameType==2 constraints: assert(genFuncletInfo.fiSpDelta1 < 0); @@ -1674,7 +1698,7 @@ void CodeGen::genFuncletEpilog() { // With OSR we may see large values for fiSpDelta1 // Avoid post-indexed load when PAC is enabled. - if (m_compiler->opts.IsOSR() || (JitConfig.JitPacEnabled() != 0)) + if (m_compiler->opts.IsOSR() || ((JitConfig.JitPacEnabled() != 0) && !m_compiler->IsAot())) { GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); @@ -1721,6 +1745,11 @@ void CodeGen::genFuncletEpilog() } } + if (m_compiler->IsAot()) + { + GetEmitter()->emitPacInEpilog(); + } + inst_RV(INS_ret, REG_LR, TYP_I_IMPL); m_compiler->unwindReturn(REG_LR); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 59749ad4c2e692..bad09918b6cc66 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4486,6 +4486,11 @@ void CodeGen::genPushCalleeSavedRegisters() } #endif // DEBUG + if (m_compiler->IsAot()) + { + GetEmitter()->emitPacInProlog(); + } + // The frameType number is arbitrary, is defined below, and corresponds to one of the frame styles we // generate based on various sizes. int frameType = 0; @@ -4543,7 +4548,7 @@ void CodeGen::genPushCalleeSavedRegisters() assert(totalFrameSize <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES); - if (JitConfig.JitPacEnabled() != 0) + if (JitConfig.JitPacEnabled() != 0 && !m_compiler->IsAot()) { // Avoid pre-indexed variant of store to save FP/LR when PAC is enabled. genStackPointerAdjustment(-totalFrameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ true); @@ -4607,7 +4612,10 @@ void CodeGen::genPushCalleeSavedRegisters() assert(m_compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize); - GetEmitter()->emitPacInProlog(); + if (!m_compiler->IsAot()) + { + GetEmitter()->emitPacInProlog(); + } GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, m_compiler->lvaOutgoingArgSpaceSize); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize); @@ -4722,7 +4730,10 @@ void CodeGen::genPushCalleeSavedRegisters() } else { - GetEmitter()->emitPacInProlog(); + if (!m_compiler->IsAot()) + { + GetEmitter()->emitPacInProlog(); + } // No frame pointer (no chaining). assert((maskSaveRegsInt & RBM_FP) == 0); assert((maskSaveRegsInt & RBM_LR) != 0); diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index be48226f9de182..d420813a833ab1 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -832,7 +832,7 @@ void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, Hijack #if defined(TARGET_ARM64) if (frameIterator->GetCodeManager()->IsPacPresent(frameIterator->GetMethodInfo(), frameIterator->GetRegisterSet())) { - pvHijackedAddr = PacSignPtr(pvHijackedAddr, (void*)frameIterator->GetRegisterSet()->pFP); + pvHijackedAddr = PacSignPtr(pvHijackedAddr, (void*)frameIterator->GetRegisterSet()->GetSP()); } #endif // TARGET_ARM64 *ppvRetAddrLocation = pvHijackedAddr; From f00f78949f9298301461859dff81bb30dbf85fbf Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Wed, 18 Mar 2026 12:03:21 +0000 Subject: [PATCH 07/65] Fix formatting --- src/coreclr/jit/codegenarm64.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index b0c234901e87bb..edbe730c135b73 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -1659,7 +1659,7 @@ void CodeGen::genFuncletEpilog() GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, imm); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, static_cast(imm)); - if(!m_compiler->IsAot()) + if (!m_compiler->IsAot()) { GetEmitter()->emitPacInEpilog(); } @@ -1680,7 +1680,7 @@ void CodeGen::genFuncletEpilog() GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSP_to_FPLR_save_delta); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta); - if(!m_compiler->IsAot()) + if (!m_compiler->IsAot()) { GetEmitter()->emitPacInEpilog(); } From 4582f7326c4002b15e6feb2ed675d822427e11ea Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Tue, 31 Mar 2026 14:54:03 +0100 Subject: [PATCH 08/65] Add MethodAssociatedData containing hijackinfo for NativeAOT --- src/coreclr/nativeaot/Runtime/ICodeManager.h | 4 +- src/coreclr/nativeaot/Runtime/thread.cpp | 7 +- .../Runtime/unix/UnixNativeCodeManager.cpp | 108 ++++++++++-- .../Runtime/unix/UnixNativeCodeManager.h | 5 +- .../Runtime/windows/CoffNativeCodeManager.cpp | 6 +- .../Runtime/windows/CoffNativeCodeManager.h | 3 +- .../DependencyAnalysis/INodeWithCodeInfo.cs | 16 +- .../tools/Common/JitInterface/CorInfoImpl.cs | 161 ++++++++++++++---- .../MethodAssociatedDataNode.cs | 12 ++ .../DependencyAnalysis/MethodCodeNode.cs | 15 +- 10 files changed, 272 insertions(+), 65 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/ICodeManager.h b/src/coreclr/nativeaot/Runtime/ICodeManager.h index 91b7e3c2b16ba1..fcd4b3792cf1db 100644 --- a/src/coreclr/nativeaot/Runtime/ICodeManager.h +++ b/src/coreclr/nativeaot/Runtime/ICodeManager.h @@ -115,6 +115,7 @@ enum class AssociatedDataFlags : unsigned char { None = 0, HasUnboxingStubTarget = 1, + HasArm64PacHijackData = 2, }; enum UnwindStackFrameFlags @@ -168,7 +169,8 @@ class ICodeManager virtual bool GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in - PTR_PTR_VOID * ppvRetAddrLocation // out + PTR_PTR_VOID * ppvRetAddrLocation, // out + uintptr_t * pSpForArm64PacSign // out ) PURE_VIRTUAL #ifdef TARGET_X86 diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index 417e177db1d6a7..44cc5c0f5ce33f 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -800,11 +800,14 @@ void Thread::HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, HijackFunc* pfnHij void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, HijackFunc* pfnHijackFunction) { void** ppvRetAddrLocation; + uintptr_t spForPacSign = 0; frameIterator->CalculateCurrentMethodState(); + if (frameIterator->GetCodeManager()->GetReturnAddressHijackInfo(frameIterator->GetMethodInfo(), frameIterator->GetRegisterSet(), - &ppvRetAddrLocation)) + &ppvRetAddrLocation, + &spForPacSign)) { ASSERT(ppvRetAddrLocation != NULL); @@ -838,7 +841,7 @@ void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, Hijack #if defined(TARGET_ARM64) if (frameIterator->GetCodeManager()->IsPacPresent(frameIterator->GetMethodInfo(), frameIterator->GetRegisterSet())) { - pvHijackedAddr = PacSignPtr(pvHijackedAddr, (void*)frameIterator->GetRegisterSet()->GetSP()); + pvHijackedAddr = PacSignPtr(pvHijackedAddr, (void*)spForPacSign); } #endif // TARGET_ARM64 *ppvRetAddrLocation = pvHijackedAddr; diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index ea0f6ca0b22746..e300e3221c9bf3 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -64,6 +64,19 @@ UnixNativeCodeManager::~UnixNativeCodeManager() { } +static PTR_uint8_t GetAssociatedData(PTR_uint8_t pLSDA) +{ + uint8_t unwindBlockFlags = *pLSDA++; + + if ((unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT) + pLSDA += sizeof(int32_t); + + if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) == 0) + return NULL; + + return pLSDA + *dac_cast(pLSDA); +} + #if defined(TARGET_ARM64) static size_t readULEB(const uint8_t *&p, const uint8_t *end) { @@ -79,6 +92,26 @@ static size_t readULEB(const uint8_t *&p, const uint8_t *end) return result; } +static bool TryGetSpForPacSigning(UnixNativeMethodInfo *pNativeMethodInfo, + PTR_PTR_VOID ppvRetAddrLocation, + uintptr_t *pSpForArm64PacSign) +{ + PTR_uint8_t pAssociatedData = GetAssociatedData(pNativeMethodInfo->pMainLSDA); + if (pAssociatedData == NULL || ppvRetAddrLocation == NULL) + return false; + + AssociatedDataFlags flags = (AssociatedDataFlags)(*pAssociatedData++); + if ((static_cast(flags) & static_cast(AssociatedDataFlags::HasArm64PacHijackData)) == 0) + return false; + + if ((static_cast(flags) & static_cast(AssociatedDataFlags::HasUnboxingStubTarget)) != 0) + pAssociatedData += sizeof(int32_t); + + uint32_t retAddrLocationToEntrySpDelta = *dac_cast(pAssociatedData); + *pSpForArm64PacSign = dac_cast(ppvRetAddrLocation) + retAddrLocationToEntrySpDelta; + return true; +} + bool UnixNativeCodeManager::IsPacPresent(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet) { @@ -459,7 +492,7 @@ bool UnixNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) pMethodInfo = &methodInfo; #endif -#if (defined(TARGET_APPLE) && defined(TARGET_ARM64)) || defined(TARGET_ARM) +#if defined(TARGET_ARM64) || defined(TARGET_ARM) // VirtualUnwind can't unwind epilogues and some prologues. return TrailingEpilogueInstructionsCount(pMethodInfo, pvAddress) == 0 && IsInProlog(pMethodInfo, pvAddress) != 1; #else @@ -579,7 +612,7 @@ static bool IsArmPrologInstruction(uint16_t* pInstr) #endif -#if (defined(TARGET_APPLE) && defined(TARGET_ARM64)) || defined(TARGET_ARM) +#if defined(TARGET_ARM64) || defined(TARGET_ARM) // checks for known prolog instructions generated by ILC and returns // 1 - in prolog // 0 - not in prolog @@ -948,6 +981,16 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho #define LDP_BITS2 0x28400000 #define LDP_MASK2 0x7E400000 +// add sp, sp, #imm +// 1001 0001 0xxx xxxx xxxx xx11 1111 1111 +#define ADD_SP_SP_BITS 0x910003FF +#define ADD_SP_SP_MASK 0xFF8003FF + +// sub sp, fp, #imm +// 1101 0001 0xxx xxxx xxxx xx11 1011 1111 +#define SUB_SP_FP_BITS 0xD10003BF +#define SUB_SP_FP_MASK 0xFF8003FF + // Branches, Exception Generating and System instruction group // xxx1 01xx xxxx xxxx xxxx xxxx xxxx xxxx #define BEGS_BITS 0x14000000 @@ -1002,6 +1045,14 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho return -1; } } + + // Stack pointer adjustments can happen before AUTIASP/RET in some epilog layouts, + // so treat them as being in the epilog as well. + if ((instr & ADD_SP_SP_MASK) == ADD_SP_SP_BITS || + (instr & SUB_SP_FP_MASK) == SUB_SP_FP_BITS) + { + return -1; + } } #elif defined(TARGET_ARM) @@ -1225,7 +1276,8 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in - PTR_PTR_VOID * ppvRetAddrLocation) // out + PTR_PTR_VOID * ppvRetAddrLocation, // out + uintptr_t * pSpForArm64PacSign) // out { UnixNativeMethodInfo* pNativeMethodInfo = (UnixNativeMethodInfo*)pMethodInfo; @@ -1242,6 +1294,20 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn if ((unwindBlockFlags & UBF_FUNC_REVERSE_PINVOKE) != 0) return false; +#if defined(TARGET_ARM64) + bool pacPresent = IsPacPresent(pMethodInfo, pRegisterSet); + if (pacPresent) + { + // For PAC frames we only hijack locations where the current frame state is + // unambiguous. Partial prologs can save FP/LR before FP is established, and some + // epilog layouts adjust SP before the final AUTIASP/RET sequence. + if (IsInProlog(pMethodInfo, (PTR_VOID)pRegisterSet->IP) == 1) + { + return false; + } + } +#endif + #if defined(TARGET_ARM) // Ensure that PC doesn't have the Thumb bit set. Prolog and epilog // checks depend on it. @@ -1254,9 +1320,22 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn // can't figure, possibly a breakpoint instruction return false; } - else if (epilogueInstructions > 0) + +#if defined(TARGET_ARM64) + if (pacPresent && epilogueInstructions != 0) + { + return false; + } +#endif + + if (epilogueInstructions > 0) { + *pSpForArm64PacSign = 0; *ppvRetAddrLocation = (PTR_PTR_VOID)(pRegisterSet->GetSP() + (sizeof(TADDR) * (epilogueInstructions - 1))); +#if defined(TARGET_ARM64) + if (IsPacPresent(pMethodInfo, pRegisterSet) && !TryGetSpForPacSigning(pNativeMethodInfo, *ppvRetAddrLocation, pSpForArm64PacSign)) + return false; +#endif return true; } @@ -1279,6 +1358,7 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn // Unwind the current method context to the caller's context to get its stack pointer // and obtain the location of the return address on the stack #if defined(TARGET_AMD64) + *pSpForArm64PacSign = 0; if (!VirtualUnwind(pMethodInfo, pRegisterSet)) { @@ -1289,6 +1369,7 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn return true; #elif defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + *pSpForArm64PacSign = 0; if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0) p += sizeof(int32_t); @@ -1331,6 +1412,13 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *ppvRetAddrLocation = (PTR_PTR_VOID)pRegisterSet->GetReturnAddressRegisterLocation(); +#if defined(TARGET_ARM64) + if (IsPacPresent(pMethodInfo, pRegisterSet) && !TryGetSpForPacSigning(pNativeMethodInfo, *ppvRetAddrLocation, pSpForArm64PacSign)) + { + return false; + } +#endif + return true; #else return false; @@ -1482,17 +1570,7 @@ PTR_VOID UnixNativeCodeManager::GetAssociatedData(PTR_VOID ControlPC) if (!FindMethodInfo(ControlPC, (MethodInfo*)&methodInfo)) return NULL; - PTR_uint8_t p = methodInfo.pLSDA; - - uint8_t unwindBlockFlags = *p++; - - if ((unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT) - p += sizeof(uint32_t); - - if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) == 0) - return NULL; - - return dac_cast(p + *dac_cast(p)); + return dac_cast(::GetAssociatedData(methodInfo.pLSDA)); } extern "C" void RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID pvStartRange, uint32_t cbRange); diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h index be455860b3277e..aa640e7f604b39 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h @@ -63,7 +63,7 @@ class UnixNativeCodeManager : public ICodeManager bool IsUnwindable(PTR_VOID pvAddress); -#if (defined(TARGET_APPLE) && defined(TARGET_ARM64)) || defined(TARGET_ARM) +#if defined(TARGET_ARM64) || defined(TARGET_ARM) int IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddress); #endif @@ -71,7 +71,8 @@ class UnixNativeCodeManager : public ICodeManager bool GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in - PTR_PTR_VOID * ppvRetAddrLocation); // out + PTR_PTR_VOID * ppvRetAddrLocation, // out + uintptr_t * pSpForArm64PacSign);// out #if defined(TARGET_ARM64) bool IsPacPresent(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet); diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index f358003f0f6c3e..b801039f1eb2d5 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -881,7 +881,8 @@ bool CoffNativeCodeManager::IsPacPresent(MethodInfo * pMethodInfo, bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in - PTR_PTR_VOID * ppvRetAddrLocation) // out + PTR_PTR_VOID * ppvRetAddrLocation, // out + uintptr_t * pSpForArm64PacSign) // out { CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo; @@ -912,6 +913,7 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn #endif #if defined(TARGET_AMD64) + *pSpForArm64PacSign = 0; context.Rsp = pRegisterSet->GetSP(); context.Rbp = pRegisterSet->GetFP(); context.Rip = pRegisterSet->GetIP(); @@ -928,6 +930,7 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *ppvRetAddrLocation = (PTR_PTR_VOID)(context.Rsp - sizeof (PVOID)); return true; #elif defined(TARGET_ARM64) + *pSpForArm64PacSign = pRegisterSet->GetSP(); if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0) p += sizeof(int32_t); @@ -985,6 +988,7 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *ppvRetAddrLocation = (PTR_PTR_VOID)contextPointers.Lr; return true; #else + *pSpForArm64PacSign = 0; EstablisherFrame = 0; HandlerData = NULL; return false; diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h index 0f54e591f95298..d85735979b4681 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h @@ -94,7 +94,8 @@ class CoffNativeCodeManager : public ICodeManager bool GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in - PTR_PTR_VOID * ppvRetAddrLocation); // out + PTR_PTR_VOID * ppvRetAddrLocation, // out + uintptr_t * pSpForArm64PacSign); // out #ifdef TARGET_X86 GCRefKind GetReturnValueKind(MethodInfo * pMethodInfo, diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/INodeWithCodeInfo.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/INodeWithCodeInfo.cs index c216ff2c57d632..3744e2c172417c 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/INodeWithCodeInfo.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/INodeWithCodeInfo.cs @@ -8,12 +8,12 @@ namespace ILCompiler.DependencyAnalysis [Flags] public enum FrameInfoFlags { - Handler = 0x01, - Filter = 0x02, + Handler = 0x01, + Filter = 0x02, - HasEHInfo = 0x04, - ReversePInvoke = 0x08, - HasAssociatedData = 0x10, + HasEHInfo = 0x04, + ReversePInvoke = 0x08, + HasAssociatedData = 0x10, } public struct FrameInfo : IEquatable @@ -65,4 +65,10 @@ public DebugEHClauseInfo(uint tryOffset, uint tryLength, uint handlerOffset, uin HandlerLength = handlerLength; } } + + public interface IArm64PacHijackInfoNode + { + bool HasPacHijackInfo { get; } + uint PacRetAddrLocationToEntrySpDelta { get; } + } } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index b4fde174ed8f6d..b55e5325d6f7f8 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -117,7 +117,7 @@ public LikelyClassMethodRecord(IntPtr handle, uint likelihood) } [DllImport(JitLibrary)] - private static extern uint getLikelyClasses(LikelyClassMethodRecord* pLikelyClasses, uint maxLikelyClasses, PgoInstrumentationSchema* schema, uint countSchemaItems, byte*pInstrumentationData, int ilOffset); + private static extern uint getLikelyClasses(LikelyClassMethodRecord* pLikelyClasses, uint maxLikelyClasses, PgoInstrumentationSchema* schema, uint countSchemaItems, byte* pInstrumentationData, int ilOffset); [DllImport(JitLibrary)] private static extern uint getLikelyMethods(LikelyClassMethodRecord* pLikelyMethods, uint maxLikelyMethods, PgoInstrumentationSchema* schema, uint countSchemaItems, byte* pInstrumentationData, int ilOffset); @@ -141,7 +141,7 @@ private static extern CorJitResult JitCompileMethod(out IntPtr exception, ref CORINFO_METHOD_INFO info, uint flags, out IntPtr nativeEntry, out uint codeSize); [DllImport(JitSupportLibrary)] - private static extern IntPtr AllocException([MarshalAs(UnmanagedType.LPWStr)]string message, int messageLength); + private static extern IntPtr AllocException([MarshalAs(UnmanagedType.LPWStr)] string message, int messageLength); [DllImport(JitSupportLibrary)] private static extern void JitSetOs(IntPtr jit, CORINFO_OS os); @@ -440,6 +440,89 @@ private CompilationResult CompileMethodInternal(IMethodNode methodCodeNodeNeedin partial void DetermineIfCompilationShouldBeRetried(ref CompilationResult result); +#if !READYTORUN + private const short Arm64DwarfLrRegister = 30; + + private bool TryGetUnixPacRetAddrLocationToEntrySpDelta(FrameInfo[] frameInfos, out uint retAddrLocationToEntrySpDelta) + { + retAddrLocationToEntrySpDelta = 0; + + var target = _compilation.TypeSystemContext.Target; + if (target.Architecture != TargetArchitecture.ARM64 || target.OperatingSystem != TargetOS.Linux) + { + return false; + } + + if (frameInfos == null) + { + return false; + } + + foreach (FrameInfo frameInfo in frameInfos) + { + if ((frameInfo.Flags & (FrameInfoFlags.Handler | FrameInfoFlags.Filter)) != 0) + { + continue; + } + + byte[] blobData = frameInfo.BlobData; + if (blobData == null || blobData.Length == 0 || (blobData.Length % 8) != 0) + { + return false; + } + + short cfaRegister = -1; + int cfaOffset = 0; + int lrOffset = int.MinValue; + bool hasPac = false; + + for (int offset = 0; offset < blobData.Length; offset += 8) + { + CFI_OPCODE opcode = (CFI_OPCODE)blobData[offset + 1]; + short dwarfReg = BitConverter.ToInt16(blobData, offset + 2); + int cfiOffset = BitConverter.ToInt32(blobData, offset + 4); + + switch (opcode) + { + case CFI_OPCODE.CFI_DEF_CFA: + cfaRegister = dwarfReg; + cfaOffset = cfiOffset; + break; + + case CFI_OPCODE.CFI_DEF_CFA_REGISTER: + cfaRegister = dwarfReg; + break; + + case CFI_OPCODE.CFI_ADJUST_CFA_OFFSET: + cfaOffset += cfiOffset; + break; + + case CFI_OPCODE.CFI_REL_OFFSET: + if (dwarfReg == Arm64DwarfLrRegister) + { + lrOffset = cfiOffset; + } + break; + + case CFI_OPCODE.CFI_NEGATE_RA_STATE: + hasPac = true; + break; + } + } + + if (!hasPac || cfaRegister < 0 || cfaOffset < 0 || lrOffset == int.MinValue || cfaOffset < lrOffset) + { + return false; + } + + retAddrLocationToEntrySpDelta = checked((uint)(cfaOffset - lrOffset)); + return true; + } + + return false; + } +#endif + private void PublishCode() { var relocs = _codeRelocs.ToArray(); @@ -491,6 +574,12 @@ private void PublishCode() #endif _methodCodeNode.InitializeFrameInfos(_frameInfos); +#if !READYTORUN + if (TryGetUnixPacRetAddrLocationToEntrySpDelta(_frameInfos, out uint PacRetAddrLocationToEntrySpDelta)) + { + _methodCodeNode.InitializeArm64PacHijackInfo(PacRetAddrLocationToEntrySpDelta); + } +#endif #if READYTORUN _methodCodeNode.InitializeColdFrameInfos(_coldFrameInfos); #endif @@ -843,12 +932,12 @@ private bool Get_CORINFO_METHOD_INFO(MethodDesc method, MethodIL methodIL, CORIN private Dictionary _instantiationToJitVisibleInstantiation; private CORINFO_CLASS_STRUCT_** GetJitInstantiation(Instantiation inst) { - IntPtr [] jitVisibleInstantiation; + IntPtr[] jitVisibleInstantiation; _instantiationToJitVisibleInstantiation ??= new Dictionary(); if (!_instantiationToJitVisibleInstantiation.TryGetValue(inst, out jitVisibleInstantiation)) { - jitVisibleInstantiation = new IntPtr[inst.Length]; + jitVisibleInstantiation = new IntPtr[inst.Length]; for (int i = 0; i < inst.Length; i++) jitVisibleInstantiation[i] = (IntPtr)ObjectToHandle(inst[i]); _instantiationToJitVisibleInstantiation.Add(inst, jitVisibleInstantiation); @@ -1071,7 +1160,7 @@ private TypeSystemEntity entityFromContext(CORINFO_CONTEXT_STRUCT* contextStruct { if (contextStruct == contextFromMethodBeingCompiled()) { - return MethodBeingCompiled.HasInstantiation ? (TypeSystemEntity)MethodBeingCompiled: (TypeSystemEntity)MethodBeingCompiled.OwningType; + return MethodBeingCompiled.HasInstantiation ? (TypeSystemEntity)MethodBeingCompiled : (TypeSystemEntity)MethodBeingCompiled.OwningType; } return (TypeSystemEntity)HandleToObject((void*)((nuint)contextStruct & ~(nuint)CorInfoContextFlags.CORINFO_CONTEXTFLAGS_MASK)); @@ -1921,33 +2010,33 @@ private void resolveToken(ref CORINFO_RESOLVED_TOKEN pResolvedToken) } } else - if (result is FieldDesc) - { - FieldDesc field = result as FieldDesc; + if (result is FieldDesc) + { + FieldDesc field = result as FieldDesc; - // References to literal fields from IL body should never resolve. - // The CLR would throw a MissingFieldException while jitting and so should we. - if (field.IsLiteral) - ThrowHelper.ThrowMissingFieldException(field.OwningType, field.GetName()); + // References to literal fields from IL body should never resolve. + // The CLR would throw a MissingFieldException while jitting and so should we. + if (field.IsLiteral) + ThrowHelper.ThrowMissingFieldException(field.OwningType, field.GetName()); - pResolvedToken.hField = ObjectToHandle(field); + pResolvedToken.hField = ObjectToHandle(field); - TypeDesc owningClass = field.OwningType; - pResolvedToken.hClass = ObjectToHandle(owningClass); + TypeDesc owningClass = field.OwningType; + pResolvedToken.hClass = ObjectToHandle(owningClass); #if !SUPPORT_JIT - _compilation.TypeSystemContext.EnsureLoadableType(owningClass); + _compilation.TypeSystemContext.EnsureLoadableType(owningClass); #endif #if !READYTORUN - _compilation.NodeFactory.MetadataManager.GetDependenciesDueToAccess(ref _additionalDependencies, _compilation.NodeFactory, (MethodIL)methodIL, field); + _compilation.NodeFactory.MetadataManager.GetDependenciesDueToAccess(ref _additionalDependencies, _compilation.NodeFactory, (MethodIL)methodIL, field); #else ValidateSafetyOfUsingTypeEquivalenceOfType(field.FieldType); #endif - } - else - { - TypeDesc type = (TypeDesc)result; + } + else + { + TypeDesc type = (TypeDesc)result; #if READYTORUN if (recordToken) @@ -1956,19 +2045,19 @@ private void resolveToken(ref CORINFO_RESOLVED_TOKEN pResolvedToken) } #endif - if (pResolvedToken.tokenType == CorInfoTokenKind.CORINFO_TOKENKIND_Newarr) - { - if (type.IsVoid) - ThrowHelper.ThrowInvalidProgramException(ExceptionStringID.InvalidProgramSpecific, methodIL.OwningMethod); + if (pResolvedToken.tokenType == CorInfoTokenKind.CORINFO_TOKENKIND_Newarr) + { + if (type.IsVoid) + ThrowHelper.ThrowInvalidProgramException(ExceptionStringID.InvalidProgramSpecific, methodIL.OwningMethod); - type = type.MakeArrayType(); - } - pResolvedToken.hClass = ObjectToHandle(type); + type = type.MakeArrayType(); + } + pResolvedToken.hClass = ObjectToHandle(type); #if !SUPPORT_JIT - _compilation.TypeSystemContext.EnsureLoadableType(type); + _compilation.TypeSystemContext.EnsureLoadableType(type); #endif - } + } pResolvedToken.pTypeSpec = null; pResolvedToken.cbTypeSpec = 0; @@ -2009,10 +2098,10 @@ private void findCallSiteSig(CORINFO_MODULE_STRUCT_* module, uint methTOK, CORIN result = WellKnownType.RuntimeMethodHandle; } else - if (pResolvedToken.hField != null) - { - result = WellKnownType.RuntimeFieldHandle; - } + if (pResolvedToken.hField != null) + { + result = WellKnownType.RuntimeFieldHandle; + } return ObjectToHandle(_compilation.TypeSystemContext.GetWellKnownType(result)); } @@ -2338,7 +2427,7 @@ public static int GetClassAlignmentRequirementStatic(DefType type) // private static bool ShouldAlign8(int dwR8Fields, int dwTotalFields) { - return dwR8Fields*2>dwTotalFields && dwR8Fields>=2; + return dwR8Fields * 2 > dwTotalFields && dwR8Fields >= 2; } private static bool ShouldAlign8(DefType type) @@ -3716,7 +3805,7 @@ private uint getThreadTLSIndex(ref void* ppIndirection) { throw new NotImplementedException("getThreadTLSIndex"); } private Dictionary _helperCache = new Dictionary(); - private void getHelperFtn(CorInfoHelpFunc ftnNum, CORINFO_CONST_LOOKUP *pNativeEntrypoint, CORINFO_METHOD_STRUCT_** pMethod) + private void getHelperFtn(CorInfoHelpFunc ftnNum, CORINFO_CONST_LOOKUP* pNativeEntrypoint, CORINFO_METHOD_STRUCT_** pMethod) { // We never return a method handle from the managed implementation of this method today if (pMethod != null) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/MethodAssociatedDataNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/MethodAssociatedDataNode.cs index 24f84ba902eef1..28b877a5c4160d 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/MethodAssociatedDataNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/MethodAssociatedDataNode.cs @@ -14,6 +14,7 @@ public enum AssociatedDataFlags : byte { None = 0, HasUnboxingStubTarget = 1, + HasArm64PacHijackData = 2, } /// @@ -58,6 +59,10 @@ public static bool MethodHasAssociatedData(IMethodNode methodNode) if (unboxThunk != null && unboxThunk.IsSpecialUnboxingThunk) return true; + IArm64PacHijackInfoNode arm64PacHijackInfoNode = methodNode as IArm64PacHijackInfoNode; + if (arm64PacHijackInfoNode != null && arm64PacHijackInfoNode.HasPacHijackInfo) + return true; + return false; } @@ -80,6 +85,13 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly) objData.EmitReloc(unboxThunkNode.GetUnboxingThunkTarget(factory), RelocType.IMAGE_REL_BASED_RELPTR32); } + IArm64PacHijackInfoNode pacHijackInfoNode = _methodNode as IArm64PacHijackInfoNode; + if (pacHijackInfoNode != null && pacHijackInfoNode.HasPacHijackInfo) + { + flags |= AssociatedDataFlags.HasArm64PacHijackData; + objData.EmitUInt(pacHijackInfoNode.PacRetAddrLocationToEntrySpDelta); + } + objData.EmitByte(flagsReservation, (byte)flags); return objData.ToObjectData(); diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/MethodCodeNode.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/MethodCodeNode.cs index 64920d89a0b951..d48874a5a4f20b 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/MethodCodeNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/MethodCodeNode.cs @@ -15,13 +15,15 @@ namespace ILCompiler.DependencyAnalysis { [DebuggerTypeProxy(typeof(MethodCodeNodeDebugView))] - public class MethodCodeNode : ObjectNode, IMethodBodyNode, INodeWithCodeInfo, INodeWithDebugInfo, ISpecialUnboxThunkNode, IMethodCodeNodeWithTypeSignature + public class MethodCodeNode : ObjectNode, IMethodBodyNode, INodeWithCodeInfo, INodeWithDebugInfo, ISpecialUnboxThunkNode, IMethodCodeNodeWithTypeSignature, IArm64PacHijackInfoNode { private MethodDesc _method; private ObjectData _methodCode; private FrameInfo[] _frameInfos; private byte[] _gcInfo; private MethodExceptionHandlingInfoNode _ehInfo; + private bool _HasPacHijackInfo; + private uint _PacRetAddrLocationToEntrySpDelta; private DebugLocInfo[] _debugLocInfos; private DebugVarInfo[] _debugVarInfos; private DebugEHClauseInfo[] _debugEHClauseInfos; @@ -43,7 +45,7 @@ public void SetCode(ObjectData data) _methodCode = data; } - public MethodDesc Method => _method; + public MethodDesc Method => _method; protected override string GetName(NodeFactory factory) => this.GetMangledName(factory.NameMangler); @@ -117,6 +119,8 @@ public ISymbolNode GetUnboxingThunkTarget(NodeFactory factory) public FrameInfo[] FrameInfos => _frameInfos; public byte[] GCInfo => _gcInfo; public MethodExceptionHandlingInfoNode EHInfo => _ehInfo; + public bool HasPacHijackInfo => _HasPacHijackInfo; + public uint PacRetAddrLocationToEntrySpDelta => _PacRetAddrLocationToEntrySpDelta; public ISymbolNode GetAssociatedDataNode(NodeFactory factory) { @@ -138,6 +142,13 @@ public void InitializeGCInfo(byte[] gcInfo) _gcInfo = gcInfo; } + public void InitializeArm64PacHijackInfo(uint retAddrLocationToEntrySpDelta) + { + Debug.Assert(!_HasPacHijackInfo); + _HasPacHijackInfo = true; + _PacRetAddrLocationToEntrySpDelta = retAddrLocationToEntrySpDelta; + } + public void InitializeEHInfo(ObjectData ehInfo) { Debug.Assert(_ehInfo == null); From e1cfd90e595a69de35efe57a614d88d493ef279a Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 9 Apr 2026 18:19:11 +0100 Subject: [PATCH 09/65] Use unwind info to determine value of SP at the time of signing using paciasp --- src/coreclr/inc/cfi.h | 2 +- src/coreclr/jit/codegenarm64.cpp | 169 +++--------- src/coreclr/jit/codegenarmarch.cpp | 27 +- src/coreclr/jit/unwindarm64.cpp | 14 +- src/coreclr/nativeaot/Runtime/ICodeManager.h | 1 - .../nativeaot/Runtime/arm64/MiscStubs.S | 1 - .../nativeaot/Runtime/arm64/MiscStubs.asm | 3 +- .../Runtime/unix/UnixNativeCodeManager.cpp | 158 ++++++++++- .../DependencyAnalysis/INodeWithCodeInfo.cs | 6 - .../tools/Common/JitInterface/CorInfoImpl.cs | 90 ------- .../MethodAssociatedDataNode.cs | 12 - .../ObjectWriter/Dwarf/DwarfCfiOpcode.cs | 2 +- .../DependencyAnalysis/MethodCodeNode.cs | 13 +- .../JitInterface/CorInfoImpl.RyuJit.cs | 4 +- src/coreclr/vm/arm64/asmhelpers.S | 4 +- src/coreclr/vm/arm64/asmhelpers.asm | 4 +- src/coreclr/vm/excep.cpp | 251 ++++++++++++++++-- src/coreclr/vm/excep.h | 2 +- src/coreclr/vm/threads.h | 4 +- src/coreclr/vm/threadsuspend.cpp | 52 ++-- 20 files changed, 470 insertions(+), 349 deletions(-) diff --git a/src/coreclr/inc/cfi.h b/src/coreclr/inc/cfi.h index 95a7a0077c8e25..a0f4c246a4b0b7 100644 --- a/src/coreclr/inc/cfi.h +++ b/src/coreclr/inc/cfi.h @@ -11,7 +11,7 @@ enum CFI_OPCODE CFI_DEF_CFA_REGISTER, // New register is used to compute CFA CFI_REL_OFFSET, // Register is saved at offset from the current CFA CFI_DEF_CFA, // Take address from register and add offset to it - CFI_NEGATE_RA_STATE, // Sign the return address in lr with paciaz + CFI_NEGATE_RA_STATE, // Sign the return address in lr with paciasp }; struct CFI_CODE diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index c5ec82510d89eb..b4cef5aabe4c73 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -213,25 +213,12 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) { case 1: { - if (JitConfig.JitPacEnabled() != 0 && !m_compiler->IsAot()) - { - // Generate: - // ldp fp,lr,[sp] - // autiasp - // add sp, sp, #framesz - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); - m_compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); - GetEmitter()->emitPacInEpilog(); - genStackPointerAdjustment(totalFrameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ true); - } - else - { - // Generate: - // ldp fp,lr,[sp],#framesz - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize, - INS_OPTS_POST_INDEX); - m_compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); - } + // Generate: + // ldp fp,lr,[sp],#framesz + + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize, + INS_OPTS_POST_INDEX); + m_compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); break; } @@ -239,16 +226,12 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) { // Generate: // ldp fp,lr,[sp,#outsz] - // autiasp ; if PAC is enabled // add sp,sp,#framesz GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, m_compiler->lvaOutgoingArgSpaceSize); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize); - if (!m_compiler->IsAot()) - { - GetEmitter()->emitPacInEpilog(); - } + GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); m_compiler->unwindAllocStack(totalFrameSize); break; @@ -267,6 +250,11 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) } } + if (JitConfig.JitPacEnabled() != 0) + { + GetEmitter()->emitPacInEpilog(); + } + // For OSR, we must also adjust the SP to remove the Tier0 frame. // if (m_compiler->opts.IsOSR()) @@ -292,11 +280,6 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spAdjust); m_compiler->unwindAllocStack(spAdjust); } - - if (m_compiler->IsAot()) - { - GetEmitter()->emitPacInEpilog(); - } } //------------------------------------------------------------------------ @@ -507,28 +490,13 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, assert(!useSaveNextPair); if ((spOffset == 0) && (spDelta >= -512)) { - // We can use pre-indexed addressing when pointer authentication PAC is disabled. + // We can use pre-indexed addressing when the stack adjustment fits in the instruction. + // Generate: + // stp REG, REG + 1, [SP, #spDelta]! + // 64-bit STP offset range: -512 to 504, multiple of 8. assert(reg1 != REG_LR); - if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR) && !m_compiler->IsAot()) - { - // Generate: - // sub SP, SP, #spDelta - // paciasp - // stp REG, REG + 1, [SP] - assert(reg1 == REG_FP); - genStackPointerAdjustment(spDelta, REG_NA, nullptr, /* reportUnwindData */ true); - GetEmitter()->emitPacInProlog(); - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); - m_compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); - } - else - { - // Generate: - // stp REG, REG + 1, [SP, #spDelta]! - // 64-bit STP offset range: -512 to 504, multiple of 8. - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX); - m_compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta); - } + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX); + m_compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta); needToSaveRegs = false; } else // (spOffset != 0) || (spDelta < -512) @@ -549,11 +517,6 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, assert((spOffset % 8) == 0); assert(reg1 != REG_LR); - if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR) && !m_compiler->IsAot()) - { - assert(reg1 == REG_FP); - GetEmitter()->emitPacInProlog(); - } GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); if (TargetOS::IsUnix && m_compiler->generateCFIUnwindCodes()) @@ -669,23 +632,10 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, assert(!useSaveNextPair); if ((spOffset == 0) && (spDelta <= 504)) { - if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR) && !m_compiler->IsAot()) - { - // We separate SP increment and loading FP/LR when PAC is enabled. - assert(reg1 == REG_FP); - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, 0); - m_compiler->unwindSaveRegPair(reg1, reg2, 0); - GetEmitter()->emitPacInEpilog(); - genStackPointerAdjustment(spDelta, REG_NA, nullptr, /* reportUnwindData */ true); - } - else - { - // Fold the SP change into this instruction. - // ldp reg1, reg2, [SP], #spDelta - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, - INS_OPTS_POST_INDEX); - m_compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta); - } + // Fold the SP change into this instruction. + // ldp reg1, reg2, [SP], #spDelta + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX); + m_compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta); } else // (spOffset != 0) || (spDelta > 504) { @@ -695,12 +645,6 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); m_compiler->unwindSaveRegPair(reg1, reg2, spOffset); - if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR) && !m_compiler->IsAot()) - { - assert(reg1 == REG_FP); - GetEmitter()->emitPacInEpilog(); - } - // generate add SP,SP,imm genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); } @@ -709,12 +653,6 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, { GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); - if ((JitConfig.JitPacEnabled() != 0) && (reg2 == REG_LR) && !m_compiler->IsAot()) - { - assert(reg1 == REG_FP); - GetEmitter()->emitPacInEpilog(); - } - if (TargetOS::IsUnix && m_compiler->generateCFIUnwindCodes()) { useSaveNextPair = false; @@ -1418,7 +1356,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) m_compiler->unwindBegProlog(); - if (m_compiler->IsAot()) + if (JitConfig.JitPacEnabled() != 0) { GetEmitter()->emitPacInProlog(); } @@ -1455,29 +1393,14 @@ void CodeGen::genFuncletProlog(BasicBlock* block) bool scratchRegIsZero = false; genAllocLclFrame(-genFuncletInfo.fiSpDelta1, REG_SCRATCH, &scratchRegIsZero, maskArgRegsLiveIn); genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_SCRATCH, nullptr, /* reportUnwindData */ true); - if (!m_compiler->IsAot()) - { - GetEmitter()->emitPacInProlog(); - } GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); } else { - if (JitConfig.JitPacEnabled() != 0 && !m_compiler->IsAot()) - { - // generate sub SP,SP,imm - genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_NA, nullptr, /* reportUnwindData */ true); - GetEmitter()->emitPacInProlog(); - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); - m_compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); - } - else - { - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, - genFuncletInfo.fiSpDelta1, INS_OPTS_PRE_INDEX); - m_compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1); - } + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1, + INS_OPTS_PRE_INDEX); + m_compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1); } maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now @@ -1496,10 +1419,6 @@ void CodeGen::genFuncletProlog(BasicBlock* block) assert(genFuncletInfo.fiSpDelta2 == 0); - if (!m_compiler->IsAot()) - { - GetEmitter()->emitPacInProlog(); - } GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSP_to_FPLR_save_delta); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta); @@ -1508,15 +1427,13 @@ void CodeGen::genFuncletProlog(BasicBlock* block) } else if (genFuncletInfo.fiFrameType == 3) { - // Avoid using pre-indexed store when PAC is enabled. - if ((m_compiler->opts.IsOSR()) || ((JitConfig.JitPacEnabled() != 0) && !m_compiler->IsAot())) + if (m_compiler->opts.IsOSR()) { // With OSR we may see large values for fiSpDelta1 // We repurpose genAllocLclFram to do the necessary probing. bool scratchRegIsZero = false; genAllocLclFrame(-genFuncletInfo.fiSpDelta1, REG_SCRATCH, &scratchRegIsZero, maskArgRegsLiveIn); genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_SCRATCH, nullptr, /* reportUnwindData */ true); - GetEmitter()->emitPacInProlog(); GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); } @@ -1646,24 +1563,11 @@ void CodeGen::genFuncletEpilog() { // With OSR we may see large values for fiSpDelta1 // - if (m_compiler->opts.IsOSR() || ((JitConfig.JitPacEnabled() != 0) && !m_compiler->IsAot())) + if (m_compiler->opts.IsOSR()) { - ssize_t imm = 0; - regNumber tempReg = REG_SCRATCH; - - if (JitConfig.JitPacEnabled() != 0) - { - imm = genFuncletInfo.fiSP_to_FPLR_save_delta; - tempReg = REG_NA; - } - - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, imm); - m_compiler->unwindSaveRegPair(REG_FP, REG_LR, static_cast(imm)); - if (!m_compiler->IsAot()) - { - GetEmitter()->emitPacInEpilog(); - } - genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, tempReg, nullptr, /* reportUnwindData */ true); + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); + m_compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); + genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, REG_SCRATCH, nullptr, /* reportUnwindData */ true); } else { @@ -1680,10 +1584,6 @@ void CodeGen::genFuncletEpilog() GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSP_to_FPLR_save_delta); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta); - if (!m_compiler->IsAot()) - { - GetEmitter()->emitPacInEpilog(); - } // fiFrameType==2 constraints: assert(genFuncletInfo.fiSpDelta1 < 0); @@ -1697,12 +1597,11 @@ void CodeGen::genFuncletEpilog() else if (genFuncletInfo.fiFrameType == 3) { // With OSR we may see large values for fiSpDelta1 - // Avoid post-indexed load when PAC is enabled. - if (m_compiler->opts.IsOSR() || ((JitConfig.JitPacEnabled() != 0) && !m_compiler->IsAot())) + if (m_compiler->opts.IsOSR()) { GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); - GetEmitter()->emitPacInEpilog(); + genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, REG_SCRATCH, nullptr, /* reportUnwindData */ true); } else @@ -1745,7 +1644,7 @@ void CodeGen::genFuncletEpilog() } } - if (m_compiler->IsAot()) + if (JitConfig.JitPacEnabled() != 0) { GetEmitter()->emitPacInEpilog(); } diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index ff6a0c8e9195be..a715052624c623 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4476,7 +4476,7 @@ void CodeGen::genPushCalleeSavedRegisters() } #endif // DEBUG - if (m_compiler->IsAot()) + if (JitConfig.JitPacEnabled() != 0) { GetEmitter()->emitPacInProlog(); } @@ -4538,20 +4538,9 @@ void CodeGen::genPushCalleeSavedRegisters() assert(totalFrameSize <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES); - if (JitConfig.JitPacEnabled() != 0 && !m_compiler->IsAot()) - { - // Avoid pre-indexed variant of store to save FP/LR when PAC is enabled. - genStackPointerAdjustment(-totalFrameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ true); - GetEmitter()->emitPacInProlog(); - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); - m_compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); - } - else - { - GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize, - INS_OPTS_PRE_INDEX); - m_compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); - } + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize, + INS_OPTS_PRE_INDEX); + m_compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR offset = (int)m_compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR @@ -4602,10 +4591,6 @@ void CodeGen::genPushCalleeSavedRegisters() assert(m_compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize); - if (!m_compiler->IsAot()) - { - GetEmitter()->emitPacInProlog(); - } GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, m_compiler->lvaOutgoingArgSpaceSize); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize); @@ -4720,10 +4705,6 @@ void CodeGen::genPushCalleeSavedRegisters() } else { - if (!m_compiler->IsAot()) - { - GetEmitter()->emitPacInProlog(); - } // No frame pointer (no chaining). assert((maskSaveRegsInt & RBM_FP) == 0); assert((maskSaveRegsInt & RBM_LR) != 0); diff --git a/src/coreclr/jit/unwindarm64.cpp b/src/coreclr/jit/unwindarm64.cpp index c950f50a11548b..f5696b1aa841b4 100644 --- a/src/coreclr/jit/unwindarm64.cpp +++ b/src/coreclr/jit/unwindarm64.cpp @@ -644,13 +644,13 @@ void Compiler::unwindPacSignLR() #if defined(FEATURE_CFI_SUPPORT) if (generateCFIUnwindCodes()) { - FuncInfoDsc* func = funCurrentFunc(); - UNATIVE_OFFSET cbProlog = 0; - if (compGeneratingProlog) + // Emit NEGATE_RA_STATE opcode in prologs. + if (!compGeneratingProlog) { - cbProlog = unwindGetCurrentOffset(func); + return; } - + FuncInfoDsc* func = funCurrentFunc(); + UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func); // Maps to DW_CFA_AARCH64_negate_ra_state createCfiCode(func, cbProlog, CFI_NEGATE_RA_STATE, DWARF_REG_ILLEGAL); @@ -658,7 +658,7 @@ void Compiler::unwindPacSignLR() } #endif // FEATURE_CFI_SUPPORT - // pac_sign_lr: 11111100: sign the return address in lr with paciaz + // pac_sign_lr: 11111100: sign the return address in lr with paciasp funCurrentFunc()->uwi.AddCode(0xFC); } @@ -1110,7 +1110,7 @@ void DumpUnwindInfo(Compiler* comp, } else if (b1 == 0xFC) { - // pac_sign_lr: 11111100 : sign the return address in lr with paciaz. + // pac_sign_lr: 11111100 : sign the return address in lr with paciasp. printf(" %02X pac_sign_lr\n", b1); } diff --git a/src/coreclr/nativeaot/Runtime/ICodeManager.h b/src/coreclr/nativeaot/Runtime/ICodeManager.h index fcd4b3792cf1db..cd1e61e7393fa7 100644 --- a/src/coreclr/nativeaot/Runtime/ICodeManager.h +++ b/src/coreclr/nativeaot/Runtime/ICodeManager.h @@ -115,7 +115,6 @@ enum class AssociatedDataFlags : unsigned char { None = 0, HasUnboxingStubTarget = 1, - HasArm64PacHijackData = 2, }; enum UnwindStackFrameFlags diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S index 2b020360e17700..b0c1e8da846c6f 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S @@ -19,7 +19,6 @@ // void* PacSignPtr(void *, void *); // This function sign the input pointer using zero as salt. -// To avoid failing on non-PAC enabled machines, we use paciaz (instead of paciza) which signs lr explicitly. // Thus we need to move input in lr, sign it and then copy it back to the result register. .arch_extension pauth LEAF_ENTRY PacSignPtr, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm index 0b41103f9ef30c..fae95d519db4d4 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm @@ -19,12 +19,11 @@ ; void* PacSignPtr(void *, void *); ; This function sign the input pointer using zero as salt. -; To avoid failing on non-PAC enabled machines, we use paciaz (instead of paciza) which signs lr explicitly. ; Thus we need to move input in lr, sign it and then copy it back to the result register. LEAF_ENTRY PacSignPtr mov x17, x0 mov x16, x1 - DCD 0xD503233F ; paciasp instruction in binary to avoid error while compiling with non-PAC enabled compilers + DCD 0xD503211F ; pacia1716 instruction in binary to avoid error while compiling with non-PAC enabled compilers mov x0, x17 ret LEAF_END PacSignPtr diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index e300e3221c9bf3..e8fedc070c8f51 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -82,7 +82,8 @@ static size_t readULEB(const uint8_t *&p, const uint8_t *end) { size_t result = 0; unsigned shift = 0; - while (p < end) { + while (p < end) + { uint8_t byte = *p++; result |= size_t(byte & 0x7F) << shift; if ((byte & 0x80) == 0) // clear top bit indicates the last by of the value @@ -92,23 +93,158 @@ static size_t readULEB(const uint8_t *&p, const uint8_t *end) return result; } +static ssize_t readSLEB(const uint8_t *&p, const uint8_t *end) +{ + ssize_t result = 0; + unsigned shift = 0; + uint8_t byte = 0; + + while (p < end) + { + byte = *p++; + result |= ssize_t(byte & 0x7F) << shift; + shift += 7; + if ((byte & 0x80) == 0) // clear top bit indicates the last by of the value + { + break; + } + } + + if ((shift < (sizeof(result) * 8)) && ((byte & 0x40) != 0)) + { + result |= -((ssize_t)1 << shift); + } + + return result; +} + static bool TryGetSpForPacSigning(UnixNativeMethodInfo *pNativeMethodInfo, - PTR_PTR_VOID ppvRetAddrLocation, - uintptr_t *pSpForArm64PacSign) + PTR_PTR_VOID ppvRetAddrLocation, + uintptr_t *pSpForPacSign) { - PTR_uint8_t pAssociatedData = GetAssociatedData(pNativeMethodInfo->pMainLSDA); - if (pAssociatedData == NULL || ppvRetAddrLocation == NULL) + if (ppvRetAddrLocation == NULL) + return false; + + const uint8_t* p = (const uint8_t*)pNativeMethodInfo->unwind_info; + uint32_t fdeLength = *dac_cast((uint8_t*)p); + const uint8_t* end = p + fdeLength; + p += sizeof(uint32_t); // FDE length + + if (*dac_cast((uint8_t*)p) == 0) return false; - AssociatedDataFlags flags = (AssociatedDataFlags)(*pAssociatedData++); - if ((static_cast(flags) & static_cast(AssociatedDataFlags::HasArm64PacHijackData)) == 0) + p += sizeof(uint32_t); // CIE pointer + p += sizeof(uint32_t); // PC start + p += sizeof(uint32_t); // function length + + size_t augmentationLength = readULEB(p, end); + if ((size_t)(end - p) < augmentationLength) return false; + p += augmentationLength; + + constexpr int DataAlignFactor = -4; + constexpr uint8_t ReturnAddressRegister = 30; + + int cfaOffset = 0; + int lrOffset = INT_MIN; + bool hasPac = false; + + while (p < end) + { + uint8_t op = *p++; + + if (op == DW_CFA_AARCH64_negate_ra_state) + { + hasPac = true; + continue; + } + + if ((op & 0xC0) == DW_CFA_advance_loc) + { + continue; + } + + if ((op & 0xC0) == DW_CFA_offset) + { + uint8_t dwarfReg = op & 0x3F; + ssize_t offsetFactor = (ssize_t)readULEB(p, end); + if (dwarfReg == ReturnAddressRegister) + { + lrOffset = cfaOffset + (int)(offsetFactor * DataAlignFactor); + } + continue; + } + + switch (op) + { + case DW_CFA_nop: + break; + + case DW_CFA_advance_loc1: + p += sizeof(uint8_t); + break; + + case DW_CFA_advance_loc2: + p += sizeof(uint16_t); + break; + + case DW_CFA_advance_loc4: + p += sizeof(uint32_t); + break; + + case DW_CFA_offset_extended: + { + uint8_t dwarfReg = (uint8_t)readULEB(p, end); + ssize_t offsetFactor = (ssize_t)readULEB(p, end); + if (dwarfReg == ReturnAddressRegister) + { + lrOffset = cfaOffset + (int)(offsetFactor * DataAlignFactor); + } + break; + } - if ((static_cast(flags) & static_cast(AssociatedDataFlags::HasUnboxingStubTarget)) != 0) - pAssociatedData += sizeof(int32_t); + case DW_CFA_offset_extended_sf: + { + uint8_t dwarfReg = (uint8_t)readULEB(p, end); + ssize_t offsetFactor = readSLEB(p, end); + if (dwarfReg == ReturnAddressRegister) + { + lrOffset = cfaOffset + (int)(offsetFactor * DataAlignFactor); + } + break; + } + + case DW_CFA_def_cfa: + readULEB(p, end); // register + cfaOffset = (int)readULEB(p, end); + break; + + case DW_CFA_def_cfa_register: + readULEB(p, end); // register + break; + + case DW_CFA_def_cfa_offset: + cfaOffset = (int)readULEB(p, end); + break; + + case DW_CFA_def_cfa_sf: + readULEB(p, end); // register + cfaOffset = (int)(readSLEB(p, end) * DataAlignFactor); + break; + + case DW_CFA_def_cfa_offset_sf: + cfaOffset = (int)(readSLEB(p, end) * DataAlignFactor); + break; + + default: + return false; + } + } + + if (!hasPac || lrOffset == INT_MIN || cfaOffset < lrOffset) + return false; - uint32_t retAddrLocationToEntrySpDelta = *dac_cast(pAssociatedData); - *pSpForArm64PacSign = dac_cast(ppvRetAddrLocation) + retAddrLocationToEntrySpDelta; + *pSpForPacSign = dac_cast(ppvRetAddrLocation) + (cfaOffset - lrOffset); return true; } diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/INodeWithCodeInfo.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/INodeWithCodeInfo.cs index 3744e2c172417c..6885a341263b9f 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/INodeWithCodeInfo.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/INodeWithCodeInfo.cs @@ -65,10 +65,4 @@ public DebugEHClauseInfo(uint tryOffset, uint tryLength, uint handlerOffset, uin HandlerLength = handlerLength; } } - - public interface IArm64PacHijackInfoNode - { - bool HasPacHijackInfo { get; } - uint PacRetAddrLocationToEntrySpDelta { get; } - } } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index d07512482971d9..2bb9786db075fc 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -440,89 +440,6 @@ private CompilationResult CompileMethodInternal(IMethodNode methodCodeNodeNeedin partial void DetermineIfCompilationShouldBeRetried(ref CompilationResult result); -#if !READYTORUN - private const short Arm64DwarfLrRegister = 30; - - private bool TryGetUnixPacRetAddrLocationToEntrySpDelta(FrameInfo[] frameInfos, out uint retAddrLocationToEntrySpDelta) - { - retAddrLocationToEntrySpDelta = 0; - - var target = _compilation.TypeSystemContext.Target; - if (target.Architecture != TargetArchitecture.ARM64 || target.OperatingSystem != TargetOS.Linux) - { - return false; - } - - if (frameInfos == null) - { - return false; - } - - foreach (FrameInfo frameInfo in frameInfos) - { - if ((frameInfo.Flags & (FrameInfoFlags.Handler | FrameInfoFlags.Filter)) != 0) - { - continue; - } - - byte[] blobData = frameInfo.BlobData; - if (blobData == null || blobData.Length == 0 || (blobData.Length % 8) != 0) - { - return false; - } - - short cfaRegister = -1; - int cfaOffset = 0; - int lrOffset = int.MinValue; - bool hasPac = false; - - for (int offset = 0; offset < blobData.Length; offset += 8) - { - CFI_OPCODE opcode = (CFI_OPCODE)blobData[offset + 1]; - short dwarfReg = BitConverter.ToInt16(blobData, offset + 2); - int cfiOffset = BitConverter.ToInt32(blobData, offset + 4); - - switch (opcode) - { - case CFI_OPCODE.CFI_DEF_CFA: - cfaRegister = dwarfReg; - cfaOffset = cfiOffset; - break; - - case CFI_OPCODE.CFI_DEF_CFA_REGISTER: - cfaRegister = dwarfReg; - break; - - case CFI_OPCODE.CFI_ADJUST_CFA_OFFSET: - cfaOffset += cfiOffset; - break; - - case CFI_OPCODE.CFI_REL_OFFSET: - if (dwarfReg == Arm64DwarfLrRegister) - { - lrOffset = cfiOffset; - } - break; - - case CFI_OPCODE.CFI_NEGATE_RA_STATE: - hasPac = true; - break; - } - } - - if (!hasPac || cfaRegister < 0 || cfaOffset < 0 || lrOffset == int.MinValue || cfaOffset < lrOffset) - { - return false; - } - - retAddrLocationToEntrySpDelta = checked((uint)(cfaOffset - lrOffset)); - return true; - } - - return false; - } -#endif - private void PublishCode() { var relocs = _codeRelocs.ToArray(); @@ -572,14 +489,7 @@ private void PublishCode() _methodCodeNode.ColdCodeNode = _methodColdCodeNode; } #endif - _methodCodeNode.InitializeFrameInfos(_frameInfos); -#if !READYTORUN - if (TryGetUnixPacRetAddrLocationToEntrySpDelta(_frameInfos, out uint PacRetAddrLocationToEntrySpDelta)) - { - _methodCodeNode.InitializeArm64PacHijackInfo(PacRetAddrLocationToEntrySpDelta); - } -#endif #if READYTORUN _methodCodeNode.InitializeColdFrameInfos(_coldFrameInfos); #endif diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/MethodAssociatedDataNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/MethodAssociatedDataNode.cs index 28b877a5c4160d..24f84ba902eef1 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/MethodAssociatedDataNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/MethodAssociatedDataNode.cs @@ -14,7 +14,6 @@ public enum AssociatedDataFlags : byte { None = 0, HasUnboxingStubTarget = 1, - HasArm64PacHijackData = 2, } /// @@ -59,10 +58,6 @@ public static bool MethodHasAssociatedData(IMethodNode methodNode) if (unboxThunk != null && unboxThunk.IsSpecialUnboxingThunk) return true; - IArm64PacHijackInfoNode arm64PacHijackInfoNode = methodNode as IArm64PacHijackInfoNode; - if (arm64PacHijackInfoNode != null && arm64PacHijackInfoNode.HasPacHijackInfo) - return true; - return false; } @@ -85,13 +80,6 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly) objData.EmitReloc(unboxThunkNode.GetUnboxingThunkTarget(factory), RelocType.IMAGE_REL_BASED_RELPTR32); } - IArm64PacHijackInfoNode pacHijackInfoNode = _methodNode as IArm64PacHijackInfoNode; - if (pacHijackInfoNode != null && pacHijackInfoNode.HasPacHijackInfo) - { - flags |= AssociatedDataFlags.HasArm64PacHijackData; - objData.EmitUInt(pacHijackInfoNode.PacRetAddrLocationToEntrySpDelta); - } - objData.EmitByte(flagsReservation, (byte)flags); return objData.ToObjectData(); diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs index 1e3a2a1f37b23e..e79f859da5efce 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs @@ -12,6 +12,6 @@ internal enum CFI_OPCODE CFI_DEF_CFA_REGISTER, // New register is used to compute CFA CFI_REL_OFFSET, // Register is saved at offset from the current CFA CFI_DEF_CFA, // Take address from register and add offset to it. - CFI_NEGATE_RA_STATE, // Sign the return address in lr with paciaz + CFI_NEGATE_RA_STATE, // Sign the return address in lr with paciasp } } diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/MethodCodeNode.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/MethodCodeNode.cs index d48874a5a4f20b..d9ac127387a087 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/MethodCodeNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/MethodCodeNode.cs @@ -15,15 +15,13 @@ namespace ILCompiler.DependencyAnalysis { [DebuggerTypeProxy(typeof(MethodCodeNodeDebugView))] - public class MethodCodeNode : ObjectNode, IMethodBodyNode, INodeWithCodeInfo, INodeWithDebugInfo, ISpecialUnboxThunkNode, IMethodCodeNodeWithTypeSignature, IArm64PacHijackInfoNode + public class MethodCodeNode : ObjectNode, IMethodBodyNode, INodeWithCodeInfo, INodeWithDebugInfo, ISpecialUnboxThunkNode, IMethodCodeNodeWithTypeSignature { private MethodDesc _method; private ObjectData _methodCode; private FrameInfo[] _frameInfos; private byte[] _gcInfo; private MethodExceptionHandlingInfoNode _ehInfo; - private bool _HasPacHijackInfo; - private uint _PacRetAddrLocationToEntrySpDelta; private DebugLocInfo[] _debugLocInfos; private DebugVarInfo[] _debugVarInfos; private DebugEHClauseInfo[] _debugEHClauseInfos; @@ -119,8 +117,6 @@ public ISymbolNode GetUnboxingThunkTarget(NodeFactory factory) public FrameInfo[] FrameInfos => _frameInfos; public byte[] GCInfo => _gcInfo; public MethodExceptionHandlingInfoNode EHInfo => _ehInfo; - public bool HasPacHijackInfo => _HasPacHijackInfo; - public uint PacRetAddrLocationToEntrySpDelta => _PacRetAddrLocationToEntrySpDelta; public ISymbolNode GetAssociatedDataNode(NodeFactory factory) { @@ -142,13 +138,6 @@ public void InitializeGCInfo(byte[] gcInfo) _gcInfo = gcInfo; } - public void InitializeArm64PacHijackInfo(uint retAddrLocationToEntrySpDelta) - { - Debug.Assert(!_HasPacHijackInfo); - _HasPacHijackInfo = true; - _PacRetAddrLocationToEntrySpDelta = retAddrLocationToEntrySpDelta; - } - public void InitializeEHInfo(ObjectData ehInfo) { Debug.Assert(_ehInfo == null); diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index b70ca36d73c333..35dc6604a9f9e5 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -102,7 +102,7 @@ private enum CFI_OPCODE CFI_DEF_CFA_REGISTER, // New register is used to compute CFA CFI_REL_OFFSET, // Register is saved at offset from the current CFA CFI_DEF_CFA, // Take address from register and add offset to it. - CFI_NEGATE_RA_STATE, // Sign the return address in lr with paciaz + CFI_NEGATE_RA_STATE, // Sign the return address in lr with paciasp } // Get the CFI data in the same shape as clang/LLVM generated one. This improves the compatibility with libunwind and other unwind solutions @@ -205,7 +205,7 @@ private static byte[] CompressARM64CFI(byte[] blobData) cfiWriter.Write((byte)codeOffset); cfiWriter.Write((byte)CFI_OPCODE.CFI_NEGATE_RA_STATE); cfiWriter.Write((short)-1); - cfiWriter.Write(cfaOffset); + cfiWriter.Write(0); } if (cfaRegister != -1) diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 761a6e08fbe7bf..4db43137c25a02 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -168,7 +168,7 @@ NESTED_END OnHijackTripThread, _TEXT #endif // FEATURE_HIJACK // void* PacStripPtr(void *); -// This function strips the pointer of PAC info that is passed as an agrument. +// This function strips the pointer of PAC info that is passed as an argument. // To avoid failing on non-PAC enabled machines, we use xpaclri (instead of xpaci) which strips lr explicitly. // Thus we move need to move input in lr, strip it and copy it back to the result register. .arch_extension pauth @@ -182,7 +182,7 @@ NESTED_END OnHijackTripThread, _TEXT // void* PacSignPtr(void *, void *); // This function sign the input pointer using zero as salt. -// To avoid failing on non-PAC enabled machines, we use paciaz (instead of paciza) which signs lr explicitly. +// To avoid failing on non-PAC enabled machines, we use pacia1716 which signs lr explicitly. // Thus we need to move input in lr, sign it and then copy it back to the result register. .arch_extension pauth LEAF_ENTRY PacSignPtr, _TEXT diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index e17c9389c06b1a..7ed08cb83b282f 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -332,12 +332,12 @@ NoFloatingPointRetVal ; void* PacSignPtr(void *, void *); ; This function sign the input pointer using zero as salt. -; To avoid failing on non-PAC enabled machines, we use paciaz (instead of paciza) which signs lr explicitly. +; To avoid failing on non-PAC enabled machines, we use pacia1716 which signs lr explicitly. ; Thus we need to move input in lr, sign it and then copy it back to the result register. LEAF_ENTRY PacSignPtr mov x17, x0 mov x16, x1 - DCD 0xD503233F ; paciasp instruction in binary to avoid error while compiling with non-PAC enabled compilers + DCD 0xD503211F ; pacia1716 instruction in binary to avoid error while compiling with non-PAC enabled compilers mov x0, $x17 ret LEAF_END PacSignPtr diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 6033ac677f3511..ebe182887b5717 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6350,10 +6350,27 @@ bool IsIPInEpilog(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, BOOL *pSaf } #if defined(TARGET_ARM64) -// This function is used to check if Pointer Authentication (PAC) is enabled for this stack frame or not. -bool IsPacPresent(EECodeInfo *pCodeInfo) +// Read the PAC state for a managed ARM64 frame and, when PAC is enabled, recover the +// SP value that was live when PACIASP signed the return address in LR. +bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR retAddrLocation, TADDR *pSpForPacSign) { + _ASSERTE(pContextToCheck != nullptr); _ASSERTE(pCodeInfo->IsValid()); + _ASSERTE(pSpForPacSign != nullptr); + + *pSpForPacSign = 0; + + // In prolog or epilog while the current frame is still being established or torn down we cannot retrieve correct SP reliably. + if (IsIPInProlog(pCodeInfo)) + { + return false; + } + + BOOL unused = TRUE; + if (IsIPInEpilog(pContextToCheck, pCodeInfo, &unused)) + { + return false; + } // Lookup the function entry for the IP PTR_RUNTIME_FUNCTION FunctionEntry = pCodeInfo->GetFunctionEntry(); @@ -6365,7 +6382,8 @@ bool IsPacPresent(EECodeInfo *pCodeInfo) _ASSERTE((FunctionEntry->UnwindData & 3) == 0); // Packed unwind data are not used with managed code ULONG_PTR UnwindDataPtr = (ULONG_PTR)(ImageBase + FunctionEntry->UnwindData); - // Read the header word. For unwind info layout details refer https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling?view=msvc-170#arm64-exception-handling-information + // For unwind info layout details refer https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling?view=msvc-170#arm64-exception-handling-information + // Read the header word. DWORD HeaderWord = *(DWORD*)UnwindDataPtr; UnwindDataPtr += 4; @@ -6377,8 +6395,8 @@ bool IsPacPresent(EECodeInfo *pCodeInfo) { EpilogScopeCount = *(DWORD*)UnwindDataPtr; UnwindDataPtr += 4; - UnwindWords = (EpilogScopeCount >> 16) & 0xff; - EpilogScopeCount &= 0xffff; + UnwindWords = (EpilogScopeCount >> 16) & 0xFF; + EpilogScopeCount &= 0xFFFF; } if ((HeaderWord & (1 << 21)) != 0) @@ -6389,39 +6407,232 @@ bool IsPacPresent(EECodeInfo *pCodeInfo) ULONG_PTR UnwindCodePtr = UnwindDataPtr + 4 * EpilogScopeCount; ULONG_PTR UnwindCodesEndPtr = UnwindCodePtr + 4 * UnwindWords; - while (UnwindCodePtr < UnwindCodesEndPtr) + auto GetUnwindOpSize = [](BYTE unwindCode) -> SIZE_T { - ULONG CurCode = *(BYTE*)UnwindCodePtr; - if ((CurCode & 0xfe) == 0xe4) // The last unwind code + if (unwindCode < 0xC0) + { + return 1; + } + else if (unwindCode < 0xE0) + { + return 2; + } + else + { + static const BYTE UnwindCodeSizeTable[32] = + { + 4,1,2,1,1,1,1,3, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 2,3,4,5,1,1,1,1 + }; + + return UnwindCodeSizeTable[unwindCode - 0xE0]; + } + }; + + ULONG unwindOpCount = 0; + for (ULONG_PTR unwindOpPtr = UnwindCodePtr; unwindOpPtr < UnwindCodesEndPtr;) + { + BYTE curCode = *(BYTE*)unwindOpPtr; + if ((curCode & 0xFE) == 0xE4) // end, end_c { break; } - if (CurCode == 0xFC) // Unwind code for PAC (pac_sign_lr) + SIZE_T unwindOpSize = GetUnwindOpSize(curCode); + if ((unwindOpPtr + unwindOpSize) > UnwindCodesEndPtr) { - return true; + return false; } - if (CurCode < 0xC0) + unwindOpCount++; + unwindOpPtr += unwindOpSize; + } + + ULONG_PTR* unwindOpStarts = (ULONG_PTR*)_alloca(unwindOpCount * sizeof(ULONG_PTR)); + ULONG unwindOpIndex = 0; + for (ULONG_PTR unwindOpPtr = UnwindCodePtr; unwindOpPtr < UnwindCodesEndPtr;) + { + BYTE curCode = *(BYTE*)unwindOpPtr; + if ((curCode & 0xFE) == 0xE4) // end, end_c { - UnwindCodePtr += 1; + break; } - else if (CurCode < 0xE0) + + SIZE_T unwindOpSize = GetUnwindOpSize(curCode); + if ((unwindOpPtr + unwindOpSize) > UnwindCodesEndPtr) { - UnwindCodePtr += 2; + return false; } - else + + unwindOpStarts[unwindOpIndex++] = unwindOpPtr; + unwindOpPtr += unwindOpSize; + } + + SSIZE_T currentSpOffset = 0; + SSIZE_T pacSpOffset = SSIZE_T_MIN; + SSIZE_T lrSlotOffset = SSIZE_T_MIN; + constexpr SSIZE_T PtrSize = 8; + + // ARM64 prolog unwind codes are stored in reverse prolog order. Replay them in prolog order so + // PACIASP captures the SP that was live when LR was originally signed. + while (unwindOpIndex != 0) + { + UnwindCodePtr = unwindOpStarts[--unwindOpIndex]; + ULONG CurCode = *(BYTE*)UnwindCodePtr; + + if ((CurCode & 0xE0) == 0x00) // alloc_s { - static const BYTE UnwindCodeSizeTable[32] = + currentSpOffset -= (CurCode & 0x1F) * 16; + continue; + } + + if ((CurCode & 0xE0) == 0x20) // save_r19r20_x + { + currentSpOffset -= (CurCode & 0x1F) * 8; + continue; + } + + if ((CurCode & 0xC0) == 0x40) // save_fplr + { + lrSlotOffset = currentSpOffset + ((CurCode & 0x3F) * 8) + PtrSize; + continue; + } + + if ((CurCode & 0xC0) == 0x80) // save_fplr_x + { + currentSpOffset -= ((CurCode & 0x3F) + 1) * 8; + lrSlotOffset = currentSpOffset + PtrSize; + continue; + } + + if ((CurCode & 0xF8) == 0xC0) // alloc_m + { + ULONG x = ((CurCode & 0x7) << 8) | *(BYTE*)(UnwindCodePtr + 1); + currentSpOffset -= x * 16; + continue; + } + + if ((CurCode & 0xFC) == 0xC8) // save_regp + { + continue; + } + + if ((CurCode & 0xFC) == 0xCC) // save_regp_x + { + ULONG z = *(BYTE*)(UnwindCodePtr + 1) & 0x3F; + currentSpOffset -= (z + 1) * 8; + continue; + } + + if ((CurCode & 0xFC) == 0xD0) // save_reg + { + BYTE nextCode = *(BYTE*)(UnwindCodePtr + 1); + ULONG x = ((CurCode & 0x3) << 2) | (nextCode >> 6); + ULONG z = nextCode & 0x3F; + if (x == 11) // R30 / LR is the 12th GP register in the save_reg encodings { - 4,1,2,1,1,1,1,3, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 2,3,4,5,1,1,1,1 - }; + lrSlotOffset = currentSpOffset + z * 8; + } + + continue; + } + + if ((CurCode & 0xFE) == 0xD4) // save_reg_x + { + BYTE nextCode = *(BYTE*)(UnwindCodePtr + 1); + ULONG x = ((CurCode & 0x1) << 3) | (nextCode >> 5); + currentSpOffset -= ((nextCode & 0x1F) + 1) * 8; + if (x == 11) // R30 / LR is the 12th GP register in the save_reg encodings + { + lrSlotOffset = currentSpOffset; + } + + continue; + } + + if ((CurCode & 0xFE) == 0xD6) // save_lrpair + { + ULONG z = *(BYTE*)(UnwindCodePtr + 1) & 0x3F; + lrSlotOffset = currentSpOffset + z * 8 + PtrSize; + continue; + } + + if ((CurCode & 0xFE) == 0xD8) // save_fregp + { + continue; + } + + if ((CurCode & 0xFE) == 0xDA) // save_fregp_x + { + ULONG z = *(BYTE*)(UnwindCodePtr + 1) & 0x3F; + currentSpOffset -= (z + 1) * 8; + continue; + } + + if ((CurCode & 0xFE) == 0xDC) // save_freg + { + continue; + } + + if (CurCode == 0xDE) // save_freg_x + { + ULONG z = *(BYTE*)(UnwindCodePtr + 1) & 0x1F; + currentSpOffset -= (z + 1) * 8; + continue; + } + + if (CurCode == 0xE0) // alloc_l + { + ULONG x = (*(BYTE*)(UnwindCodePtr + 1) << 16) | (*(BYTE*)(UnwindCodePtr + 2) << 8) | *(BYTE*)(UnwindCodePtr + 3); + currentSpOffset -= x * 16; + continue; + } + + if (CurCode == 0xE1) // set_fp + { + continue; + } + + if (CurCode == 0xE2) // add_fp + { + continue; + } - UnwindCodePtr += UnwindCodeSizeTable[CurCode - 0xE0]; + if (CurCode == 0xE3) // nop + { + continue; } + + if (CurCode == 0xE6) // save_next + { + continue; + } + + if (CurCode == 0xFC) // pac_sign_lr + { + if (pacSpOffset == SSIZE_T_MIN) + { + // Snapshot the SP delta for the PACIASP in prolog. + pacSpOffset = currentSpOffset; + } + + continue; + } + + return false; } - return false; + if (pacSpOffset == SSIZE_T_MIN) + { + return true; + } + + if (lrSlotOffset == SSIZE_T_MIN) + { + return false; + } + + *pSpForPacSign = (TADDR)((SSIZE_T)retAddrLocation + pacSpOffset - lrSlotOffset); + return true; } #endif // TARGET_ARM64 diff --git a/src/coreclr/vm/excep.h b/src/coreclr/vm/excep.h index 6bf56ca341af4a..17564920f33e78 100644 --- a/src/coreclr/vm/excep.h +++ b/src/coreclr/vm/excep.h @@ -30,7 +30,7 @@ BOOL AdjustContextForJITHelpers(EXCEPTION_RECORD *pExceptionRecord, CONTEXT *pCo bool IsIPInProlog(EECodeInfo *pCodeInfo); bool IsIPInEpilog(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, BOOL *pSafeToInjectThreadAbort); #if defined(TARGET_ARM64) -bool IsPacPresent(EECodeInfo *pCodeInfo); +bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR retAddrLocation, TADDR *pSpForPacSign); #endif // TARGET_ARM64 #endif // FEATURE_HIJACK && (!TARGET_X86 || TARGET_UNIX) diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index 6daa9b0f69a5f6..b1c002347d03d3 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -2510,11 +2510,11 @@ class Thread private: #ifdef FEATURE_HIJACK - void HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86_ARG(bool hasAsyncRet) ARM64_ARG(bool isPacEnabledFrame)); + void HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86_ARG(bool hasAsyncRet)); VOID *m_pvHJRetAddr; // original return address (before hijack) #ifdef TARGET_ARM64 - VOID *m_pSp; // stack pointer of the frame being hijacked + VOID *m_pSpForPacSign; // stack pointer value that was used to sign LR with PACIASP #endif VOID **m_ppvHJRetAddrPtr; // place we bashed a new return address MethodDesc *m_HijackedFunction; // remember what we hijacked diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 767b1399a1dab0..18df951ca24ee9 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -4474,7 +4474,9 @@ struct ExecutionState bool m_IsInterruptible; // is this code interruptible? MethodDesc *m_pFD; // current function/method we're executing VOID **m_ppvRetAddrPtr; // pointer to return address in frame - VOID *m_ppvSp; // pointer to stack pointer in frame +#if defined(TARGET_ARM64) + VOID *m_pSpForPacSign; // stack pointer value that was used to sign LR with PACIASP +#endif DWORD m_RelOffset; // relative offset at which we're currently executing in this fcn IJitManager *m_pJitManager; METHODTOKEN m_MethodToken; @@ -4482,14 +4484,16 @@ struct ExecutionState ExecutionState() { LIMITED_METHOD_CONTRACT; -#ifdef TARGET_X86 +#if defined(TARGET_X86) m_FirstPass = true; +#elif defined(TARGET_ARM64) + m_pSpForPacSign = nullptr; #endif } }; // Client is responsible for suspending the thread before calling -void Thread::HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86_ARG(bool hasAsyncRet) ARM64_ARG(bool isPacEnabledFrame)) +void Thread::HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86_ARG(bool hasAsyncRet)) { CONTRACTL { NOTHROW; @@ -4547,8 +4551,8 @@ void Thread::HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86 m_pvHJRetAddr = *esb->m_ppvRetAddrPtr; #if defined(TARGET_ARM64) - m_pSp = esb->m_ppvSp; -#endif // TARGET_ARM64 + m_pSpForPacSign = esb->m_pSpForPacSign; +#endif IS_VALID_CODE_PTR((FARPROC) (TADDR)m_pvHJRetAddr); // TODO [DAVBR]: For the full fix for VsWhidbey 450273, the below @@ -4562,9 +4566,9 @@ void Thread::HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86 // Bash the stack to return to one of our stubs #if defined(TARGET_ARM64) - if (isPacEnabledFrame) + if (m_pSpForPacSign != nullptr) { - pvHijackAddr = PacSignPtr(pvHijackAddr, m_pSp); + pvHijackAddr = PacSignPtr(pvHijackAddr, m_pSpForPacSign); } #endif // TARGET_ARM64 @@ -4647,7 +4651,9 @@ StackWalkAction SWCB_GetExecutionState(CrawlFrame *pCF, VOID *pData) pES->m_pFD = pCF->GetFunction(); pES->m_MethodToken = pCF->GetMethodToken(); pES->m_ppvRetAddrPtr = 0; - pES->m_ppvSp = (void *) GetRegdisplaySP(pCF->GetRegisterSet()); +#if defined(TARGET_ARM64) + pES->m_pSpForPacSign = nullptr; +#endif pES->m_IsInterruptible = pCF->IsGcSafe(); pES->m_RelOffset = pCF->GetRelOffset(); pES->m_pJitManager = pCF->GetJitManager(); @@ -5269,13 +5275,18 @@ BOOL Thread::HandledJITCase() X86_ONLY(ReturnKind returnKind;) X86_ONLY(bool hasAsyncRet;) - ARM64_ONLY(bool isPacEnabledFrame;) + ARM64_ONLY(TADDR spForPacSign = 0;) if (GetReturnAddressHijackInfo(&codeInfo X86_ARG(&returnKind) X86_ARG(&hasAsyncRet))) { -#ifdef TARGET_ARM64 - isPacEnabledFrame = IsPacPresent(&codeInfo); -#endif - HijackThread(&esb X86_ARG(returnKind) X86_ARG(hasAsyncRet) ARM64_ARG(isPacEnabledFrame)); +#if defined(TARGET_ARM64) + if (!GetPacSignInfo(&ctx, &codeInfo, dac_cast(esb.m_ppvRetAddrPtr), &spForPacSign)) + { + continue; + } + + esb.m_pSpForPacSign = (PVOID)spForPacSign; +#endif // TARGET_ARM64 + HijackThread(&esb X86_ARG(returnKind) X86_ARG(hasAsyncRet)); } } } @@ -5825,11 +5836,16 @@ void HandleSuspensionForInterruptedThread(CONTEXT *interruptedContext) StackWalkerWalkingThreadHolder threadStackWalking(pThread); // Hijack the return address to point to the appropriate routine based on the method's return type. - ARM64_ONLY(bool isPacEnabledFrame); -#ifdef TARGET_ARM64 - isPacEnabledFrame = IsPacPresent(&codeInfo); -#endif - pThread->HijackThread(&executionState X86_ARG(returnKind) X86_ARG(hasAsyncRet) ARM64_ARG(isPacEnabledFrame)); + ARM64_ONLY(TADDR spForPacSign = 0;) +#if defined(TARGET_ARM64) + if (!GetPacSignInfo(interruptedContext, &codeInfo, dac_cast(executionState.m_ppvRetAddrPtr), &spForPacSign)) + { + return; + } + + executionState.m_pSpForPacSign = (PVOID)spForPacSign; +#endif // TARGET_ARM64 + pThread->HijackThread(&executionState X86_ARG(returnKind) X86_ARG(hasAsyncRet)); } } From cfe6ab881679d6b39f57c91f8c30f16f12cd62e0 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 10 Apr 2026 10:03:51 +0100 Subject: [PATCH 10/65] Fix windows build --- src/coreclr/vm/threadsuspend.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 18df951ca24ee9..e8106232f7d9fc 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -5281,7 +5281,7 @@ BOOL Thread::HandledJITCase() #if defined(TARGET_ARM64) if (!GetPacSignInfo(&ctx, &codeInfo, dac_cast(esb.m_ppvRetAddrPtr), &spForPacSign)) { - continue; + return FALSE; } esb.m_pSpForPacSign = (PVOID)spForPacSign; From cefcd3fc992621f0a02bb4fc3c37d1f36621197c Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 10 Apr 2026 10:41:31 +0100 Subject: [PATCH 11/65] Fix register addressing in asmhelpers --- src/coreclr/vm/arm64/asmhelpers.asm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 7ed08cb83b282f..fbd290d0144e7d 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -338,7 +338,7 @@ NoFloatingPointRetVal mov x17, x0 mov x16, x1 DCD 0xD503211F ; pacia1716 instruction in binary to avoid error while compiling with non-PAC enabled compilers - mov x0, $x17 + mov x0, x17 ret LEAF_END PacSignPtr From 280870c998202f40e51516ac67fae5e436cb7cbb Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 10 Apr 2026 21:33:49 +0100 Subject: [PATCH 12/65] Refactor out IsPacPresent to avoid parsing unwind info multiple times --- src/coreclr/nativeaot/Runtime/ICodeManager.h | 5 - src/coreclr/nativeaot/Runtime/thread.cpp | 2 +- .../Runtime/unix/UnixNativeCodeManager.cpp | 109 +++++++----------- .../Runtime/unix/UnixNativeCodeManager.h | 4 - .../Runtime/windows/CoffNativeCodeManager.cpp | 21 ++-- .../Runtime/windows/CoffNativeCodeManager.h | 2 - 6 files changed, 50 insertions(+), 93 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/ICodeManager.h b/src/coreclr/nativeaot/Runtime/ICodeManager.h index cd1e61e7393fa7..52765d516444c7 100644 --- a/src/coreclr/nativeaot/Runtime/ICodeManager.h +++ b/src/coreclr/nativeaot/Runtime/ICodeManager.h @@ -161,11 +161,6 @@ class ICodeManager virtual bool IsUnwindable(PTR_VOID pvAddress) PURE_VIRTUAL -#ifdef TARGET_ARM64 - virtual bool IsPacPresent(MethodInfo * pMethodInfo, - REGDISPLAY * pRegisterSet ) PURE_VIRTUAL -#endif - virtual bool GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in PTR_PTR_VOID * ppvRetAddrLocation, // out diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index 44cc5c0f5ce33f..4f2db0358ac578 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -839,7 +839,7 @@ void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, Hijack void* pvHijackedAddr = (void*)pfnHijackFunction; #if defined(TARGET_ARM64) - if (frameIterator->GetCodeManager()->IsPacPresent(frameIterator->GetMethodInfo(), frameIterator->GetRegisterSet())) + if (spForPacSign != 0) { pvHijackedAddr = PacSignPtr(pvHijackedAddr, (void*)spForPacSign); } diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index e8fedc070c8f51..1f352aab9dd78f 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -118,13 +118,16 @@ static ssize_t readSLEB(const uint8_t *&p, const uint8_t *end) return result; } -static bool TryGetSpForPacSigning(UnixNativeMethodInfo *pNativeMethodInfo, - PTR_PTR_VOID ppvRetAddrLocation, - uintptr_t *pSpForPacSign) +struct PacFrameInfo { - if (ppvRetAddrLocation == NULL) - return false; + bool hasPac; + int cfaOffset; + int lrOffset; +}; +static bool TryGetPacFrameInfo(UnixNativeMethodInfo *pNativeMethodInfo, + PacFrameInfo *pPacFrameInfo) +{ const uint8_t* p = (const uint8_t*)pNativeMethodInfo->unwind_info; uint32_t fdeLength = *dac_cast((uint8_t*)p); const uint8_t* end = p + fdeLength; @@ -241,73 +244,27 @@ static bool TryGetSpForPacSigning(UnixNativeMethodInfo *pNativeMethodInfo, } } - if (!hasPac || lrOffset == INT_MIN || cfaOffset < lrOffset) - return false; - - *pSpForPacSign = dac_cast(ppvRetAddrLocation) + (cfaOffset - lrOffset); + pPacFrameInfo->hasPac = hasPac; + pPacFrameInfo->cfaOffset = cfaOffset; + pPacFrameInfo->lrOffset = lrOffset; return true; } -bool UnixNativeCodeManager::IsPacPresent(MethodInfo * pMethodInfo, - REGDISPLAY * pRegisterSet) +static bool TryGetSpForPacSigning(const PacFrameInfo& pacFrameInfo, + PTR_PTR_VOID ppvRetAddrLocation, + uintptr_t *pSpForPacSign) { - UnixNativeMethodInfo* pNativeMethodInfo = (UnixNativeMethodInfo*)pMethodInfo; - const uint8_t *p = (uint8_t *) pNativeMethodInfo->unwind_info; - const uint8_t *end = p + *((uint32_t *)p); - p += 4; // Skip length - assert(*((uint32_t *)p) != 0); // Ensure it's FDE entry - p += 4; // Skip offset to CIE - p += 4; // Skip PC start - p += 4; // Skip function length - size_t augmentationLength = readULEB(p, end); - p += augmentationLength; // skip augmentation data - - while (p < end) { - uint8_t op = *p++; - - if (op == DW_CFA_AARCH64_negate_ra_state) - { - return true; - } - - if ((op & 0xC0) == DW_CFA_advance_loc) - { - continue; - } - if ((op & ~(0x3F)) == DW_CFA_offset) - { - readULEB(p, end); // offset - continue; - } - - // Extended, single‐byte opcodes: - switch (op) { - case DW_CFA_advance_loc1: - case DW_CFA_def_cfa_register: - p++; // offset - break; - - case DW_CFA_offset_extended_sf: - case DW_CFA_offset_extended: - readULEB(p, end); // register - readULEB(p, end); // offset - break; - - case DW_CFA_def_cfa_offset: // DW_CFA_def_cfa_offset - readULEB(p, end); // offset - break; + if (!pacFrameInfo.hasPac) + { + *pSpForPacSign = 0; + return true; + } - case DW_CFA_def_cfa: // DW_CFA_def_cfa - p++; // register - readULEB(p, end); // offset - break; + if (ppvRetAddrLocation == NULL || pacFrameInfo.lrOffset == INT_MIN || pacFrameInfo.cfaOffset < pacFrameInfo.lrOffset) + return false; - default: // Unknown unwind op code - //TODO-PAC: Handle unknown op codes correctly. return false/assert false? - p++; - } - } - return false; + *pSpForPacSign = dac_cast(ppvRetAddrLocation) + (pacFrameInfo.cfaOffset - pacFrameInfo.lrOffset); + return true; } #endif // TARGET_ARM64 @@ -1182,6 +1139,18 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho } } + // Post-index restore sequences such as "ldp x19, x20, [sp], #0x10" also adjust SP + // before the final AUTIASP/RET. We avoid signing with a partially-restored SP. + int baseRegister = (instr >> 5) & 0x1f; + if (baseRegister == 31) + { + if ((instr & LDP_MASK2) == LDP_BITS2 || + (instr & LDR_MASK2) == LDR_BITS2) + { + return -1; + } + } + // Stack pointer adjustments can happen before AUTIASP/RET in some epilog layouts, // so treat them as being in the epilog as well. if ((instr & ADD_SP_SP_MASK) == ADD_SP_SP_BITS || @@ -1431,7 +1400,9 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn return false; #if defined(TARGET_ARM64) - bool pacPresent = IsPacPresent(pMethodInfo, pRegisterSet); + PacFrameInfo pacFrameInfo = {}; + bool hasPacFrameInfo = TryGetPacFrameInfo(pNativeMethodInfo, &pacFrameInfo); + bool pacPresent = hasPacFrameInfo && pacFrameInfo.hasPac; if (pacPresent) { // For PAC frames we only hijack locations where the current frame state is @@ -1469,7 +1440,7 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *pSpForArm64PacSign = 0; *ppvRetAddrLocation = (PTR_PTR_VOID)(pRegisterSet->GetSP() + (sizeof(TADDR) * (epilogueInstructions - 1))); #if defined(TARGET_ARM64) - if (IsPacPresent(pMethodInfo, pRegisterSet) && !TryGetSpForPacSigning(pNativeMethodInfo, *ppvRetAddrLocation, pSpForArm64PacSign)) + if (!TryGetSpForPacSigning(pacFrameInfo, *ppvRetAddrLocation, pSpForArm64PacSign)) return false; #endif return true; @@ -1549,7 +1520,7 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *ppvRetAddrLocation = (PTR_PTR_VOID)pRegisterSet->GetReturnAddressRegisterLocation(); #if defined(TARGET_ARM64) - if (IsPacPresent(pMethodInfo, pRegisterSet) && !TryGetSpForPacSigning(pNativeMethodInfo, *ppvRetAddrLocation, pSpForArm64PacSign)) + if (!TryGetSpForPacSigning(pacFrameInfo, *ppvRetAddrLocation, pSpForArm64PacSign)) { return false; } diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h index aa640e7f604b39..7d0969d85dedc2 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h @@ -74,10 +74,6 @@ class UnixNativeCodeManager : public ICodeManager PTR_PTR_VOID * ppvRetAddrLocation, // out uintptr_t * pSpForArm64PacSign);// out -#if defined(TARGET_ARM64) - bool IsPacPresent(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet); -#endif // TARGET_ARM64 - PTR_VOID RemapHardwareFaultToGCSafePoint(MethodInfo * pMethodInfo, PTR_VOID controlPC); bool EHEnumInit(MethodInfo * pMethodInfo, PTR_VOID * pMethodStartAddress, EHEnumState * pEHEnumState); diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index b801039f1eb2d5..ffe81497b1300d 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -830,22 +830,14 @@ bool CoffNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) return true; } -#if defined(TARGET_ARM64) -bool CoffNativeCodeManager::IsPacPresent(MethodInfo * pMethodInfo, - REGDISPLAY * pRegisterSet) +static bool HasPacInUnwindInfo(PTR_VOID pUnwindDataBlob, size_t unwindDataBlobSize) { - CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo; - - size_t unwindDataBlobSize; - - PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pNativeMethodInfo->runtimeFunction, &unwindDataBlobSize); - PTR_uint8_t UnwindCodePtr = dac_cast(pUnwindDataBlob); PTR_uint8_t UnwindCodesEndPtr = dac_cast(pUnwindDataBlob) + unwindDataBlobSize; - while (UnwindCodePtr < UnwindCodesEndPtr) + while (UnwindCodePtr < UnwindCodesEndPtr) { - uint8_t CurCode = * UnwindCodePtr; + uint8_t CurCode = *UnwindCodePtr; if ((CurCode & 0xfe) == 0xe4) // The last unwind code { break; @@ -930,7 +922,7 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *ppvRetAddrLocation = (PTR_PTR_VOID)(context.Rsp - sizeof (PVOID)); return true; #elif defined(TARGET_ARM64) - *pSpForArm64PacSign = pRegisterSet->GetSP(); + *pSpForArm64PacSign = 0; if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0) p += sizeof(int32_t); @@ -955,6 +947,11 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn return false; } + if (HasPacInUnwindInfo(pUnwindDataBlob, unwindDataBlobSize)) + { + *pSpForArm64PacSign = pRegisterSet->GetSP(); + } + context.Sp = pRegisterSet->GetSP(); context.Fp = pRegisterSet->GetFP(); context.Pc = pRegisterSet->GetIP(); diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h index d85735979b4681..dbeb3956d483ea 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h @@ -90,8 +90,6 @@ class CoffNativeCodeManager : public ICodeManager bool IsUnwindable(PTR_VOID pvAddress); - bool IsPacPresent(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet); - bool GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in PTR_PTR_VOID * ppvRetAddrLocation, // out From f186f197273a8ddc82f21394c6a0364aafe08287 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 10 Apr 2026 22:31:02 +0100 Subject: [PATCH 13/65] Bail out of RAH instead of hijacking calling frame --- .../Runtime/unix/UnixNativeCodeManager.cpp | 18 +++++++++++------- .../Runtime/windows/CoffNativeCodeManager.cpp | 1 + 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index 1f352aab9dd78f..ecac4b64735c20 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -1501,6 +1501,17 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn return false; } +#if defined(TARGET_ARM64) + if (pacPresent) + { + // We hijack the caller frame later. To retrieve signing SP for correct PAC + // processing, we need to pacFrameInfo for the caller frame. Currently bail + // out of hijacking in this case. + // ToDo-PAC: Enable hijacking caller frame + return false; + } +#endif + PTR_uintptr_t oldLocation = pRegisterSet->GetReturnAddressRegisterLocation(); if (!VirtualUnwind(pMethodInfo, pRegisterSet)) { @@ -1519,13 +1530,6 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *ppvRetAddrLocation = (PTR_PTR_VOID)pRegisterSet->GetReturnAddressRegisterLocation(); -#if defined(TARGET_ARM64) - if (!TryGetSpForPacSigning(pacFrameInfo, *ppvRetAddrLocation, pSpForArm64PacSign)) - { - return false; - } -#endif - return true; #else return false; diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index ffe81497b1300d..04bd83ea1f9d42 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -830,6 +830,7 @@ bool CoffNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) return true; } +#if defined(TARGET_ARM64) static bool HasPacInUnwindInfo(PTR_VOID pUnwindDataBlob, size_t unwindDataBlobSize) { PTR_uint8_t UnwindCodePtr = dac_cast(pUnwindDataBlob); From 941d9e2cf91e2b4ad62a66f4fd3cd053429f5847 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Mon, 13 Apr 2026 11:00:18 +0100 Subject: [PATCH 14/65] Remove left-over changes from MethodAssociated Data approach --- .../Runtime/unix/UnixNativeCodeManager.cpp | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index ecac4b64735c20..9a547d0a45f3d6 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -64,19 +64,6 @@ UnixNativeCodeManager::~UnixNativeCodeManager() { } -static PTR_uint8_t GetAssociatedData(PTR_uint8_t pLSDA) -{ - uint8_t unwindBlockFlags = *pLSDA++; - - if ((unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT) - pLSDA += sizeof(int32_t); - - if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) == 0) - return NULL; - - return pLSDA + *dac_cast(pLSDA); -} - #if defined(TARGET_ARM64) static size_t readULEB(const uint8_t *&p, const uint8_t *end) { @@ -1681,7 +1668,17 @@ PTR_VOID UnixNativeCodeManager::GetAssociatedData(PTR_VOID ControlPC) if (!FindMethodInfo(ControlPC, (MethodInfo*)&methodInfo)) return NULL; - return dac_cast(::GetAssociatedData(methodInfo.pLSDA)); + PTR_uint8_t p = methodInfo.pLSDA; + + uint8_t unwindBlockFlags = *p++; + + if ((unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT) + p += sizeof(uint32_t); + + if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) == 0) + return NULL; + + return dac_cast(p + *dac_cast(p)); } extern "C" void RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID pvStartRange, uint32_t cbRange); From a755fba04d30d06e9a1ab7dab83b4fb774d558b3 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Tue, 14 Apr 2026 13:38:13 +0100 Subject: [PATCH 15/65] Fallback to DWARF for PAC unwind codes on macos --- .../Compiler/ObjectWriter/MachObjectWriter.Aot.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/MachObjectWriter.Aot.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/MachObjectWriter.Aot.cs index 6cb3f71f117d88..840405a4350d22 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/MachObjectWriter.Aot.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/MachObjectWriter.Aot.cs @@ -160,6 +160,10 @@ private static uint GetArm64CompactUnwindCode(byte[] blobData) switch (opcode) { + case CFI_OPCODE.CFI_NEGATE_RA_STATE: + // Fall back to DWARF so the AArch64 negate_ra_state opcode is preserved for libunwind. + return UNWIND_ARM64_MODE_DWARF; + case CFI_OPCODE.CFI_DEF_CFA_REGISTER: cfaRegister = dwarfReg; From 352b3fd9de941de2dddfe2cd6e2783162e211ece Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Tue, 21 Apr 2026 14:21:45 +0100 Subject: [PATCH 16/65] Used stripped the return address while creating a frame OnHijackWorker --- src/coreclr/vm/threadsuspend.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 3fed3155afa894..918a66fb25e3a1 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -24,6 +24,7 @@ #if defined(TARGET_ARM64) extern "C" void* PacSignPtr(void* ptr, void* sp); +extern "C" void* PacStripPtr(void* ptr); #endif // TARGET_ARM64 bool ThreadSuspend::s_fSuspendRuntimeInProgress = false; @@ -4828,7 +4829,7 @@ void STDCALL OnHijackWorker(HijackArgs * pArgs) thread->ResetThreadState(Thread::TS_Hijacked); // Fix up our caller's stack, so it can resume from the hijack correctly - pArgs->ReturnAddress = (size_t)thread->m_pvHJRetAddr; + pArgs->ReturnAddress = (size_t)PacStripPtr(thread->m_pvHJRetAddr); // Build a frame so that stack crawling can proceed from here back to where // we will resume execution. From 264f1feec7b0d10a45ed65931e5b21ad238f0c4f Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Tue, 21 Apr 2026 14:23:42 +0100 Subject: [PATCH 17/65] Remove redundant lef-over changes --- .../DependencyAnalysis/INodeWithCodeInfo.cs | 10 +-- .../tools/Common/JitInterface/CorInfoImpl.cs | 73 ++++++++++--------- .../DependencyAnalysis/MethodCodeNode.cs | 2 +- 3 files changed, 43 insertions(+), 42 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/INodeWithCodeInfo.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/INodeWithCodeInfo.cs index 6885a341263b9f..c216ff2c57d632 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/INodeWithCodeInfo.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/INodeWithCodeInfo.cs @@ -8,12 +8,12 @@ namespace ILCompiler.DependencyAnalysis [Flags] public enum FrameInfoFlags { - Handler = 0x01, - Filter = 0x02, + Handler = 0x01, + Filter = 0x02, - HasEHInfo = 0x04, - ReversePInvoke = 0x08, - HasAssociatedData = 0x10, + HasEHInfo = 0x04, + ReversePInvoke = 0x08, + HasAssociatedData = 0x10, } public struct FrameInfo : IEquatable diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index dd447f9dd2a321..ae543131333654 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -117,7 +117,7 @@ public LikelyClassMethodRecord(IntPtr handle, uint likelihood) } [DllImport(JitLibrary)] - private static extern uint getLikelyClasses(LikelyClassMethodRecord* pLikelyClasses, uint maxLikelyClasses, PgoInstrumentationSchema* schema, uint countSchemaItems, byte* pInstrumentationData, int ilOffset); + private static extern uint getLikelyClasses(LikelyClassMethodRecord* pLikelyClasses, uint maxLikelyClasses, PgoInstrumentationSchema* schema, uint countSchemaItems, byte*pInstrumentationData, int ilOffset); [DllImport(JitLibrary)] private static extern uint getLikelyMethods(LikelyClassMethodRecord* pLikelyMethods, uint maxLikelyMethods, PgoInstrumentationSchema* schema, uint countSchemaItems, byte* pInstrumentationData, int ilOffset); @@ -141,7 +141,7 @@ private static extern CorJitResult JitCompileMethod(out IntPtr exception, ref CORINFO_METHOD_INFO info, uint flags, out IntPtr nativeEntry, out uint codeSize); [DllImport(JitSupportLibrary)] - private static extern IntPtr AllocException([MarshalAs(UnmanagedType.LPWStr)] string message, int messageLength); + private static extern IntPtr AllocException([MarshalAs(UnmanagedType.LPWStr)]string message, int messageLength); [DllImport(JitSupportLibrary)] private static extern void JitSetOs(IntPtr jit, CORINFO_OS os); @@ -489,6 +489,7 @@ private void PublishCode() _methodCodeNode.ColdCodeNode = _methodColdCodeNode; } #endif + _methodCodeNode.InitializeFrameInfos(_frameInfos); #if READYTORUN _methodCodeNode.InitializeColdFrameInfos(_coldFrameInfos); @@ -843,12 +844,12 @@ private bool Get_CORINFO_METHOD_INFO(MethodDesc method, MethodIL methodIL, CORIN private Dictionary _instantiationToJitVisibleInstantiation; private CORINFO_CLASS_STRUCT_** GetJitInstantiation(Instantiation inst) { - IntPtr[] jitVisibleInstantiation; + IntPtr [] jitVisibleInstantiation; _instantiationToJitVisibleInstantiation ??= new Dictionary(); if (!_instantiationToJitVisibleInstantiation.TryGetValue(inst, out jitVisibleInstantiation)) { - jitVisibleInstantiation = new IntPtr[inst.Length]; + jitVisibleInstantiation = new IntPtr[inst.Length]; for (int i = 0; i < inst.Length; i++) jitVisibleInstantiation[i] = (IntPtr)ObjectToHandle(inst[i]); _instantiationToJitVisibleInstantiation.Add(inst, jitVisibleInstantiation); @@ -1071,7 +1072,7 @@ private TypeSystemEntity entityFromContext(CORINFO_CONTEXT_STRUCT* contextStruct { if (contextStruct == contextFromMethodBeingCompiled()) { - return MethodBeingCompiled.HasInstantiation ? (TypeSystemEntity)MethodBeingCompiled : (TypeSystemEntity)MethodBeingCompiled.OwningType; + return MethodBeingCompiled.HasInstantiation ? (TypeSystemEntity)MethodBeingCompiled: (TypeSystemEntity)MethodBeingCompiled.OwningType; } return (TypeSystemEntity)HandleToObject((void*)((nuint)contextStruct & ~(nuint)CorInfoContextFlags.CORINFO_CONTEXTFLAGS_MASK)); @@ -1919,33 +1920,33 @@ private void resolveToken(ref CORINFO_RESOLVED_TOKEN pResolvedToken) } } else - if (result is FieldDesc) - { - FieldDesc field = result as FieldDesc; + if (result is FieldDesc) + { + FieldDesc field = result as FieldDesc; - // References to literal fields from IL body should never resolve. - // The CLR would throw a MissingFieldException while jitting and so should we. - if (field.IsLiteral) - ThrowHelper.ThrowMissingFieldException(field.OwningType, field.GetName()); + // References to literal fields from IL body should never resolve. + // The CLR would throw a MissingFieldException while jitting and so should we. + if (field.IsLiteral) + ThrowHelper.ThrowMissingFieldException(field.OwningType, field.GetName()); - pResolvedToken.hField = ObjectToHandle(field); + pResolvedToken.hField = ObjectToHandle(field); - TypeDesc owningClass = field.OwningType; - pResolvedToken.hClass = ObjectToHandle(owningClass); + TypeDesc owningClass = field.OwningType; + pResolvedToken.hClass = ObjectToHandle(owningClass); #if !SUPPORT_JIT - _compilation.TypeSystemContext.EnsureLoadableType(owningClass); + _compilation.TypeSystemContext.EnsureLoadableType(owningClass); #endif #if !READYTORUN - _compilation.NodeFactory.MetadataManager.GetDependenciesDueToAccess(ref _additionalDependencies, _compilation.NodeFactory, (MethodIL)methodIL, field); + _compilation.NodeFactory.MetadataManager.GetDependenciesDueToAccess(ref _additionalDependencies, _compilation.NodeFactory, (MethodIL)methodIL, field); #else ValidateSafetyOfUsingTypeEquivalenceOfType(field.FieldType); #endif - } - else - { - TypeDesc type = (TypeDesc)result; + } + else + { + TypeDesc type = (TypeDesc)result; #if READYTORUN if (recordToken) @@ -1954,19 +1955,19 @@ private void resolveToken(ref CORINFO_RESOLVED_TOKEN pResolvedToken) } #endif - if (pResolvedToken.tokenType == CorInfoTokenKind.CORINFO_TOKENKIND_Newarr) - { - if (type.IsVoid) - ThrowHelper.ThrowInvalidProgramException(ExceptionStringID.InvalidProgramSpecific, methodIL.OwningMethod); + if (pResolvedToken.tokenType == CorInfoTokenKind.CORINFO_TOKENKIND_Newarr) + { + if (type.IsVoid) + ThrowHelper.ThrowInvalidProgramException(ExceptionStringID.InvalidProgramSpecific, methodIL.OwningMethod); - type = type.MakeArrayType(); - } - pResolvedToken.hClass = ObjectToHandle(type); + type = type.MakeArrayType(); + } + pResolvedToken.hClass = ObjectToHandle(type); #if !SUPPORT_JIT - _compilation.TypeSystemContext.EnsureLoadableType(type); + _compilation.TypeSystemContext.EnsureLoadableType(type); #endif - } + } pResolvedToken.pTypeSpec = null; pResolvedToken.cbTypeSpec = 0; @@ -2007,10 +2008,10 @@ private void findCallSiteSig(CORINFO_MODULE_STRUCT_* module, uint methTOK, CORIN result = WellKnownType.RuntimeMethodHandle; } else - if (pResolvedToken.hField != null) - { - result = WellKnownType.RuntimeFieldHandle; - } + if (pResolvedToken.hField != null) + { + result = WellKnownType.RuntimeFieldHandle; + } return ObjectToHandle(_compilation.TypeSystemContext.GetWellKnownType(result)); } @@ -2336,7 +2337,7 @@ public static int GetClassAlignmentRequirementStatic(DefType type) // private static bool ShouldAlign8(int dwR8Fields, int dwTotalFields) { - return dwR8Fields * 2 > dwTotalFields && dwR8Fields >= 2; + return dwR8Fields*2>dwTotalFields && dwR8Fields>=2; } private static bool ShouldAlign8(DefType type) @@ -3733,7 +3734,7 @@ private uint getThreadTLSIndex(ref void* ppIndirection) { throw new NotImplementedException("getThreadTLSIndex"); } private Dictionary _helperCache = new Dictionary(); - private void getHelperFtn(CorInfoHelpFunc ftnNum, CORINFO_CONST_LOOKUP* pNativeEntrypoint, CORINFO_METHOD_STRUCT_** pMethod) + private void getHelperFtn(CorInfoHelpFunc ftnNum, CORINFO_CONST_LOOKUP *pNativeEntrypoint, CORINFO_METHOD_STRUCT_** pMethod) { // We never return a method handle from the managed implementation of this method today if (pMethod != null) diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/MethodCodeNode.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/MethodCodeNode.cs index d9ac127387a087..64920d89a0b951 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/MethodCodeNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/MethodCodeNode.cs @@ -43,7 +43,7 @@ public void SetCode(ObjectData data) _methodCode = data; } - public MethodDesc Method => _method; + public MethodDesc Method => _method; protected override string GetName(NodeFactory factory) => this.GetMangledName(factory.NameMangler); From f455dc1701009608b1975e0eca5cf807c3bb37d3 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Tue, 21 Apr 2026 14:53:30 +0100 Subject: [PATCH 18/65] Mark PacStrip Arm64 only --- src/coreclr/vm/threadsuspend.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 918a66fb25e3a1..25d26fc9b401d5 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -4829,7 +4829,11 @@ void STDCALL OnHijackWorker(HijackArgs * pArgs) thread->ResetThreadState(Thread::TS_Hijacked); // Fix up our caller's stack, so it can resume from the hijack correctly +#if defined(TARGET_ARM64) pArgs->ReturnAddress = (size_t)PacStripPtr(thread->m_pvHJRetAddr); +#else + pArgs->ReturnAddress = (size_t)thread->m_pvHJRetAddr; +#endif //TARGET_ARM64 // Build a frame so that stack crawling can proceed from here back to where // we will resume execution. From a02fac873f2a0351b31cca420dfd9d68f784d470 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Wed, 22 Apr 2026 17:58:01 +0100 Subject: [PATCH 19/65] Temporarily Revert "JIT: Restore arm64, LA64 and RISCV64 OSR callee saves from tier0 frame (#126880)" for testing This reverts commit 653b477bd7178d7a9906d8d314ff7f5588756a1f. --- src/coreclr/inc/patchpointinfo.h | 4 - src/coreclr/jit/codegen.h | 49 +++--- src/coreclr/jit/codegenarm.cpp | 9 - src/coreclr/jit/codegenarm64.cpp | 217 ++++++------------------- src/coreclr/jit/codegenarmarch.cpp | 8 +- src/coreclr/jit/codegencommon.cpp | 113 +++++++++---- src/coreclr/jit/codegenloongarch64.cpp | 52 +----- src/coreclr/jit/codegenriscv64.cpp | 52 +----- src/coreclr/jit/codegenwasm.cpp | 13 +- src/coreclr/jit/codegenxarch.cpp | 19 +-- src/coreclr/jit/compiler.cpp | 23 +-- src/coreclr/vm/jithelpers.cpp | 5 +- src/coreclr/vm/jitinterface.cpp | 32 ---- src/coreclr/vm/method.cpp | 23 --- src/coreclr/vm/method.hpp | 8 - 15 files changed, 185 insertions(+), 442 deletions(-) diff --git a/src/coreclr/inc/patchpointinfo.h b/src/coreclr/inc/patchpointinfo.h index 1de678608de824..16e3c87d8a7137 100644 --- a/src/coreclr/inc/patchpointinfo.h +++ b/src/coreclr/inc/patchpointinfo.h @@ -47,8 +47,6 @@ struct PatchpointInfo m_keptAliveThisOffset = -1; m_securityCookieOffset = -1; m_monitorAcquiredOffset = -1; - m_asyncExecutionContextOffset = -1; - m_asyncSynchronizationContextOffset = -1; } // Copy @@ -60,8 +58,6 @@ struct PatchpointInfo m_keptAliveThisOffset = original->m_keptAliveThisOffset; m_securityCookieOffset = original->m_securityCookieOffset; m_monitorAcquiredOffset = original->m_monitorAcquiredOffset; - m_asyncExecutionContextOffset = original->m_asyncExecutionContextOffset; - m_asyncSynchronizationContextOffset = original->m_asyncSynchronizationContextOffset; for (uint32_t i = 0; i < original->m_numberOfLocals; i++) { diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 414042f6822bb0..8a9e8f06e4f428 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -348,7 +348,11 @@ class CodeGen final : public CodeGenInterface unsigned lclNum, unsigned offset, unsigned paramLclNum, const ABIPassingSegment& seg, class RegGraph* graph); void genSpillOrAddNonStandardRegisterParam(unsigned lclNum, regNumber sourceReg, class RegGraph* graph); void genEnregisterIncomingStackArgs(); +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed); +#else + void genEnregisterOSRArgsAndLocals(); +#endif void genHomeStackSegment(unsigned lclNum, const ABIPassingSegment& seg, regNumber initReg, bool* pInitRegZeroed); void genHomeSwiftStructStackParameters(); @@ -359,7 +363,6 @@ class CodeGen final : public CodeGenInterface void genClearStackVec3ArgUpperBits(); #endif // UNIX_AMD64_ABI && FEATURE_SIMD - void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); #if defined(TARGET_ARM64) bool genInstrWithConstant(instruction ins, emitAttr attr, @@ -381,23 +384,15 @@ class CodeGen final : public CodeGenInterface void genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero); - void genRestoreRegPair(regNumber reg1, - regNumber reg2, - regNumber baseReg, - int spOffset, - int spDelta, - bool useSaveNextPair, - regNumber tmpReg, - bool* pTmpRegIsZero, - bool reportUnwindData); - - void genRestoreReg(regNumber reg1, - regNumber baseReg, - int spOffset, - int spDelta, - regNumber tmpReg, - bool* pTmpRegIsZero, - bool reportUnwindData); + void genEpilogRestoreRegPair(regNumber reg1, + regNumber reg2, + int spOffset, + int spDelta, + bool useSaveNextPair, + regNumber tmpReg, + bool* pTmpRegIsZero); + + void genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero); // A simple struct to keep register pairs for prolog and epilog. struct RegPair @@ -428,12 +423,13 @@ class CodeGen final : public CodeGenInterface static int genGetSlotSizeForRegsInMask(regMaskTP regsMask); void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset); - void genRestoreCalleeSavedRegisterGroup( - regMaskTP regsMask, regNumber baseReg, int spDelta, int spOffset, bool reportUnwindData); + void genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset); void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta); void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta); + void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); + #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool genInstrWithConstant(instruction ins, emitAttr attr, @@ -446,14 +442,17 @@ class CodeGen final : public CodeGenInterface void genStackPointerAdjustment(ssize_t spAdjustment, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData); void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset); - void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, - regNumber baseReg, - int lowestCalleeSavedOffset, - bool reportUnwindData); + void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset); + void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); + +#else + void genPushCalleeSavedRegisters(); #endif - void genOSRHandleTier0CalleeSavedRegistersAndFrame(); +#if defined(TARGET_AMD64) + void genOSRRecordTier0CalleeSavedRegistersAndFrame(); void genOSRSaveRemainingCalleeSavedRegisters(); +#endif // TARGET_AMD64 void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn); diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index e28571af1629e2..2d2ab0026bd9b0 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -1853,15 +1853,6 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) #endif // PROFILING_SUPPORTED -//------------------------------------------------------------------------ -// genOSRHandleTier0CalleeSavedRegistersAndFrame: -// Not called for arm without OSR support. -// -void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() -{ - unreached(); -} - //------------------------------------------------------------------------ // genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer. // diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 8880e8fe6b2ebc..2b82d6a1095ec1 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -132,8 +132,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) // add sp,sp,#remainingFrameSz JITDUMP(" alignmentAdjustment2=%d\n", alignmentAdjustment2); - genRestoreRegPair(REG_FP, REG_LR, REG_SPBASE, alignmentAdjustment2, spAdjustment2, false, REG_IP1, - nullptr, /* reportUnwindData */ true); + genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, false, REG_IP1, nullptr); } else { @@ -154,8 +153,8 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) JITDUMP(" remainingFrameSz=%d\n", remainingFrameSz); - genRestoreRegPair(REG_FP, REG_LR, REG_SPBASE, m_compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, - false, REG_IP1, nullptr, /* reportUnwindData */ true); + genEpilogRestoreRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, false, + REG_IP1, nullptr); } // Unlike frameType=1 or frameType=2 that restore SP at the end, @@ -489,7 +488,6 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, if (spDelta != 0) { assert(!useSaveNextPair); - if ((spOffset == 0) && (spDelta >= -512)) { // We can use pre-indexed addressing when the stack adjustment fits in the instruction. @@ -594,7 +592,7 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum } //------------------------------------------------------------------------ -// genRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog. +// genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog. // The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing. // The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that // instruction. @@ -602,8 +600,7 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum // Arguments: // reg1 - First register of pair to restore. // reg2 - Second register of pair to restore. -// baseReg - Base register to load values from -// spOffset - The offset from the base register to load reg1 +// spOffset - The offset from SP to load reg1 (must be positive or zero). // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or // zero). // useSaveNextPair - True if the last prolog instruction was to save the previous register pair. This @@ -615,17 +612,15 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum // Return Value: // None. -void CodeGen::genRestoreRegPair(regNumber reg1, - regNumber reg2, - regNumber baseReg, - int spOffset, - int spDelta, - bool useSaveNextPair, - regNumber tmpReg, - bool* pTmpRegIsZero, - bool reportUnwindData) +void CodeGen::genEpilogRestoreRegPair(regNumber reg1, + regNumber reg2, + int spOffset, + int spDelta, + bool useSaveNextPair, + regNumber tmpReg, + bool* pTmpRegIsZero) { - assert((spOffset >= -512) && (spOffset <= 504)); + assert(spOffset >= 0); assert(spDelta >= 0); assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both @@ -639,53 +634,43 @@ void CodeGen::genRestoreRegPair(regNumber reg1, { // Fold the SP change into this instruction. // ldp reg1, reg2, [SP], #spDelta - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, baseReg, spDelta, INS_OPTS_POST_INDEX); - - if (reportUnwindData) - { - m_compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta); - } + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX); + m_compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta); } else // (spOffset != 0) || (spDelta > 504) { // Can't fold in the SP change; need to use a separate ADD instruction. // ldp reg1, reg2, [SP, #offset] - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, baseReg, spOffset); - if (reportUnwindData) - { - m_compiler->unwindSaveRegPair(reg1, reg2, spOffset); - } + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); + m_compiler->unwindSaveRegPair(reg1, reg2, spOffset); // generate add SP,SP,imm - genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ reportUnwindData); + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); } } else { - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, baseReg, spOffset); + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); - if (reportUnwindData) + if (TargetOS::IsUnix && m_compiler->generateCFIUnwindCodes()) { - if (TargetOS::IsUnix && m_compiler->generateCFIUnwindCodes()) - { - useSaveNextPair = false; - } + useSaveNextPair = false; + } - if (useSaveNextPair) - { - m_compiler->unwindSaveNext(); - } - else - { - m_compiler->unwindSaveRegPair(reg1, reg2, spOffset); - } + if (useSaveNextPair) + { + m_compiler->unwindSaveNext(); + } + else + { + m_compiler->unwindSaveRegPair(reg1, reg2, spOffset); } } } //------------------------------------------------------------------------ -// genRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog. +// genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog. // // Arguments: // reg1 - Register to restore. @@ -699,14 +684,9 @@ void CodeGen::genRestoreRegPair(regNumber reg1, // Return Value: // None. -void CodeGen::genRestoreReg(regNumber reg1, - regNumber baseReg, - int spOffset, - int spDelta, - regNumber tmpReg, - bool* pTmpRegIsZero, - bool reportUnwindData) +void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) { + assert(spOffset >= 0); assert(spDelta >= 0); assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned @@ -716,36 +696,24 @@ void CodeGen::genRestoreReg(regNumber reg1, { // We can use post-index addressing. // ldr REG, [SP], #spDelta - GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, baseReg, spDelta, INS_OPTS_POST_INDEX); - - if (reportUnwindData) - { - m_compiler->unwindSaveRegPreindexed(reg1, -spDelta); - } + GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX); + m_compiler->unwindSaveRegPreindexed(reg1, -spDelta); } else // (spOffset != 0) || (spDelta > 255) { // ldr reg1, [SP, #offset] - GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, baseReg, spOffset); - - if (reportUnwindData) - { - m_compiler->unwindSaveReg(reg1, spOffset); - } + GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + m_compiler->unwindSaveReg(reg1, spOffset); // generate add SP,SP,imm - genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, reportUnwindData); + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); } } else { // ldr reg1, [SP, #offset] - GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, baseReg, spOffset); - - if (reportUnwindData) - { - m_compiler->unwindSaveReg(reg1, spOffset); - } + GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + m_compiler->unwindSaveReg(reg1, spOffset); } } @@ -1009,10 +977,9 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // Arguments: // regsMask - a mask of registers for epilog generation; // spDelta - if non-zero, the amount to add to SP after the last register restore (or together with it); -// spOffset - the offset from SP that is the top of the callee-saved register area; +// spOffset - the offset from SP that is the beginning of the callee-saved register area; // -void CodeGen::genRestoreCalleeSavedRegisterGroup( - regMaskTP regsMask, regNumber baseReg, int spDelta, int spOffset, bool reportUnwindData) +void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) { const int slotSize = genGetSlotSizeForRegsInMask(regsMask); @@ -1038,19 +1005,18 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup( if (genReverseAndPairCalleeSavedRegisters) { - genRestoreRegPair(regPair.reg2, regPair.reg1, baseReg, spOffset, stackDelta, false, REG_IP1, nullptr, - reportUnwindData); + genEpilogRestoreRegPair(regPair.reg2, regPair.reg1, spOffset, stackDelta, false, REG_IP1, nullptr); } else { - genRestoreRegPair(regPair.reg1, regPair.reg2, baseReg, spOffset, stackDelta, regPair.useSaveNextPair, - REG_IP1, nullptr, reportUnwindData); + genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, regPair.useSaveNextPair, + REG_IP1, nullptr); } } else { spOffset -= slotSize; - genRestoreReg(regPair.reg1, baseReg, spOffset, stackDelta, REG_IP1, nullptr, reportUnwindData); + genEpilogRestoreReg(regPair.reg1, spOffset, stackDelta, REG_IP1, nullptr); } } } @@ -1122,23 +1088,20 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in { int spFrameDelta = (maskRestoreRegsFloat != RBM_NONE || maskRestoreRegsInt != RBM_NONE) ? 0 : spDelta; spOffset -= 2 * REGSIZE_BYTES; - genRestoreRegPair(REG_FP, REG_LR, REG_SPBASE, spOffset, spFrameDelta, false, REG_IP1, nullptr, - /* reportUnwindData */ true); + genEpilogRestoreRegPair(REG_FP, REG_LR, spOffset, spFrameDelta, false, REG_IP1, nullptr); } if (maskRestoreRegsInt != RBM_NONE) { int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment? - genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, REG_SPBASE, spIntDelta, spOffset, - /* reportUnwindData */ true); + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spIntDelta, spOffset); spOffset -= genCountBits(maskRestoreRegsInt) * REGSIZE_BYTES; } if (maskRestoreRegsFloat != RBM_NONE) { // If there is any spDelta, it must be used here. - genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, REG_SPBASE, spDelta, spOffset, - /* reportUnwindData */ true); + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, spDelta, spOffset); // No need to update spOffset since it's not used after this. } } @@ -5586,90 +5549,6 @@ void CodeGen::genStoreLclTypeSimd12(GenTreeLclVarCommon* treeNode) #endif // FEATURE_SIMD -//----------------------------------------------------------------------------- -// genOSRHandleTier0CalleeSavedRegistersAndFrame: -// Handle the tier0 callee saves by restoring them from the original tier0 frame. -// Also report phantom unwind data for the allocated stack by the tier0 frame. -// -void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() -{ - assert(m_compiler->compGeneratingProlog); - assert(m_compiler->opts.IsOSR()); - assert(m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT); - - PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; - regMaskTP const tier0CalleeSaves((regMaskSmall)patchpointInfo->CalleeSaveRegisters()); - - JITDUMP("--OSR--- tier0 has already saved "); - JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); - JITDUMP("\nEmitting restores\n"); - - // Note: the restore of LR relies on the tier0 method having been unhijacked when the OSR method prolog runs. - // This happens in the transition helper. If transition helper is not used (e.g. because we directly jump into OSR) - // then hijacking tier0 is not supported -- this is similar to tailcalls so the situation can be recorded via - // SetHasTailCalls. - - regMaskTP restoreRegsFrame = tier0CalleeSaves & (RBM_FP | RBM_LR); - regMaskTP restoreRegsFloat = tier0CalleeSaves & RBM_ALLFLOAT; - regMaskTP restoreRegsInt = tier0CalleeSaves & ~restoreRegsFrame & ~restoreRegsFloat; - - regNumber baseReg; - int topOfCalleeSaves; - if (restoreRegsFrame != RBM_NONE) - { - // FP/LR was saved with the callee saves. It is always at the top. - // Restore rest of callee saves with the offset from FP. - baseReg = REG_FP; - topOfCalleeSaves = 0; - } - else - { - // FP/LR was not saved with the callee saves. Here we do not actually - // know the offset from FP to the callee saves, but we do know the - // offset from SP. - baseReg = REG_SP; - topOfCalleeSaves = patchpointInfo->TotalFrameSize(); - if (m_compiler->info.compIsVarArgs) - { - topOfCalleeSaves -= MAX_REG_ARG * REGSIZE_BYTES; - } - - if ((topOfCalleeSaves > 504) && ((restoreRegsInt != RBM_NONE) || (restoreRegsFloat != RBM_NONE))) - { - // Too far to encode ldp with sp directly. Compute top into another register. - // Note: not reporting unwind nops for this as we will pad below anyway. - genInstrWithConstant(INS_add, EA_PTRSIZE, REG_IP0, REG_SP, topOfCalleeSaves, REG_IP0, - /* inUnwindRegion */ false); - baseReg = REG_IP0; - topOfCalleeSaves = 0; - } - } - - if (restoreRegsInt != RBM_NONE) - { - genRestoreCalleeSavedRegisterGroup(restoreRegsInt, baseReg, 0, topOfCalleeSaves, /* reportUnwindData */ false); - topOfCalleeSaves -= genCountBits(restoreRegsInt) * REGSIZE_BYTES; - } - - if (restoreRegsFloat != RBM_NONE) - { - genRestoreCalleeSavedRegisterGroup(restoreRegsFloat, baseReg, 0, topOfCalleeSaves, - /* reportUnwindData */ false); - topOfCalleeSaves -= genCountBits(restoreRegsFloat) * REGSIZE_BYTES; - } - - // Regardless of frame type fp always points to the saved fp/lr for frame - // pointer chaining purposes, so restoring them is trivial. - genRestoreRegPair(REG_FP, REG_LR, REG_FP, 0, 0, false, REG_IP1, nullptr, - /* reportUnwindData */ false); - - // Emit phantom unwind data for the tier0 frame. - m_compiler->unwindAllocStack(patchpointInfo->TotalFrameSize()); - // Emit nops to make the prolog 1:1 in unwind codes to instructions. This - // is needed for win-arm64. - m_compiler->unwindPadding(); -} - #ifdef PROFILING_SUPPORTED //----------------------------------------------------------------------------------- diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index af6f625c6372b0..9d03a501c2b88c 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4278,12 +4278,16 @@ void CodeGen::genSIMDSplitReturn(GenTree* src, const ReturnTypeDesc* retTypeDesc //------------------------------------------------------------------------ // genPushCalleeSavedRegisters: Push any callee-saved registers we have used. // -// Arguments: +// Arguments (arm64): // initReg - A scratch register (that gets set to zero on some platforms). // pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'true' if this method sets initReg register to zero, // 'false' if initReg was set to a non-zero value, and left unchanged if initReg was not touched. // +#if defined(TARGET_ARM64) void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed) +#else +void CodeGen::genPushCalleeSavedRegisters() +#endif { assert(m_compiler->compGeneratingProlog); @@ -4772,7 +4776,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe JITDUMP(" spAdjustment2=%d\n", spAdjustment2); genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed); - offset += spAdjustment2; // Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub" @@ -4799,7 +4802,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe { genPrologSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg, pInitRegZeroed); - offset += remainingFrameSz; offsetSpToSavedFp = m_compiler->lvaOutgoingArgSpaceSize; diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index e3e810308d7afd..d01f10408d9bfe 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4135,7 +4135,11 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, // initReg -- scratch register to use if needed // pInitRegZeroed -- [IN,OUT] if init reg is zero (on entry/exit) // +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void CodeGen::genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed) +#else +void CodeGen::genEnregisterOSRArgsAndLocals() +#endif { assert(m_compiler->opts.IsOSR()); PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; @@ -5049,6 +5053,23 @@ void CodeGen::genFnProlog() genBeginFnProlog(); +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + // For arm64 OSR, emit a "phantom prolog" to account for the actions taken + // in the tier0 frame that impact FP and SP on entry to the OSR method. + // + // x64 handles this differently; the phantom prolog unwind is emitted in + // genOSRRecordTier0CalleeSavedRegistersAndFrame. + // + if (m_compiler->opts.IsOSR()) + { + PatchpointInfo* patchpointInfo = m_compiler->info.compPatchpointInfo; + const int tier0FrameSize = patchpointInfo->TotalFrameSize(); + + // SP is tier0 method's SP. + m_compiler->unwindAllocStack(tier0FrameSize); + } +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + #ifdef DEBUG if (m_compiler->compJitHaltMethod()) @@ -5299,7 +5320,11 @@ void CodeGen::genFnProlog() const bool isRoot = (m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT); - const bool inheritsCalleeSaves = isRoot && m_compiler->opts.IsOSR(); +#ifdef TARGET_AMD64 + const bool isOSRx64Root = isRoot && m_compiler->opts.IsOSR(); +#else + const bool isOSRx64Root = false; +#endif // TARGET_AMD64 regMaskTP tempMask = initRegs & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd; @@ -5323,16 +5348,37 @@ void CodeGen::genFnProlog() } } - // For OSR root frames, we can't use any as of yet unsaved +#if defined(TARGET_AMD64) + // For x64 OSR root frames, we can't use any as of yet unsaved // callee save as initReg, as we defer saving these until later in // the prolog, and we don't have normal arg regs. - if (inheritsCalleeSaves) + if (isOSRx64Root) + { + initReg = REG_SCRATCH; // REG_EAX + } +#elif defined(TARGET_ARM64) + // For arm64 OSR root frames, we may need a scratch register for large + // offset addresses. Use a register that won't be allocated. + // + if (isRoot && m_compiler->opts.IsOSR()) { - initReg = REG_SCRATCH; -#if defined(TARGET_ARM64) initReg = REG_IP1; -#endif } +#elif defined(TARGET_LOONGARCH64) + // For LoongArch64 OSR root frames, we may need a scratch register for large + // offset addresses. Use a register that won't be allocated. + if (isRoot && m_compiler->opts.IsOSR()) + { + initReg = REG_SCRATCH; + } +#elif defined(TARGET_RISCV64) + // For RISC-V64 OSR root frames, we may need a scratch register for large + // offset addresses. Use a register that won't be allocated. + if (isRoot && m_compiler->opts.IsOSR()) + { + initReg = REG_SCRATCH; // REG_T0 + } +#endif #if defined(TARGET_AMD64) // If we are a varargs call, in order to set up the arguments correctly this @@ -5366,29 +5412,30 @@ void CodeGen::genFnProlog() } #endif // TARGET_ARM #else // TARGET_WASM - regNumber initReg = REG_NA; - bool initRegZeroed = false; - bool inheritsCalleeSaves = false; + regNumber initReg = REG_NA; + bool initRegZeroed = false; + bool isOSRx64Root = false; #endif // TARGET_WASM unsigned extraFrameSize = 0; - if (inheritsCalleeSaves) +#ifdef TARGET_XARCH + +#ifdef TARGET_AMD64 + if (isOSRx64Root) { // Account for the Tier0 callee saves // - genOSRHandleTier0CalleeSavedRegistersAndFrame(); + genOSRRecordTier0CalleeSavedRegistersAndFrame(); -#ifdef TARGET_AMD64 // We don't actually push any callee saves on the OSR frame, // but we still reserve space, so account for this when // allocating the local frame. // extraFrameSize = m_compiler->compCalleeRegsPushed * REGSIZE_BYTES; -#endif } +#endif // TARGET_AMD64 -#ifdef TARGET_XARCH if (doubleAlignOrFramePointerUsed()) { // OSR methods handle "saving" FP specially. @@ -5397,7 +5444,7 @@ void CodeGen::genFnProlog() // Tier0 method. The save we do here is just to set up a // proper RBP-based frame chain link. // - if (inheritsCalleeSaves && isFramePointerUsed()) + if (isOSRx64Root && isFramePointerUsed()) { GetEmitter()->emitIns_R_AR(INS_mov, EA_8BYTE, initReg, REG_FPBASE, 0); inst_RV(INS_push, initReg, TYP_REF); @@ -5413,10 +5460,9 @@ void CodeGen::genFnProlog() inst_RV(INS_push, REG_FPBASE, TYP_REF); m_compiler->unwindPush(REG_FPBASE); } -#ifdef TARGET_X86 - // On x86 establish frame pointer now. For x64 we establish it after the "sub rsp". +#ifndef TARGET_AMD64 // On AMD64, establish the frame pointer after the "sub rsp" genEstablishFramePointer(0, /*reportUnwindData*/ true); -#endif // TARGET_X86 +#endif // !TARGET_AMD64 #if DOUBLE_ALIGN if (m_compiler->genDoubleAlign()) @@ -5430,20 +5476,16 @@ void CodeGen::genFnProlog() } #endif // TARGET_XARCH - bool pushesCalleeSaves = true; -#ifdef TARGET_AMD64 - // For OSR x64 we need canonical epilogs (sequence of pops). Hence we do - // not push any register in the prolog, we rather store them in the area - // allocated by the tier0 method. For OSR on other platforms we have no - // such requirement, instead we restore tier0 saved callee saves from its - // area on entry and then run the prolog as normal. - pushesCalleeSaves = !inheritsCalleeSaves; -#endif +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + genPushCalleeSavedRegisters(initReg, &initRegZeroed); + +#else // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 - if (pushesCalleeSaves) + if (!isOSRx64Root) { - genPushCalleeSavedRegisters(initReg, &initRegZeroed); + genPushCalleeSavedRegisters(); } +#endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 #ifdef TARGET_ARM bool needToEstablishFP = false; @@ -5488,14 +5530,14 @@ void CodeGen::genFnProlog() } #endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 - // For x64 OSR we have to finish saving callee saves. - // #ifdef TARGET_AMD64 - if (inheritsCalleeSaves) + // For x64 OSR we have to finish saving int callee saves. + // + if (isOSRx64Root) { genOSRSaveRemainingCalleeSavedRegisters(); } -#endif +#endif // TARGET_AMD64 //------------------------------------------------------------------------- @@ -5625,7 +5667,12 @@ void CodeGen::genFnProlog() // we've set the live-in regs with values from the Tier0 frame. // // Otherwise we'll do some of these fetches twice. + +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) genEnregisterOSRArgsAndLocals(initReg, &initRegZeroed); +#else + genEnregisterOSRArgsAndLocals(); +#endif // OSR functions take no parameters in registers. Ensure no mappings // are present. assert((m_compiler->m_paramRegLocalMappings == nullptr) || m_compiler->m_paramRegLocalMappings->Empty()); diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index b7e548b874b233..5bca27b0247552 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -257,9 +257,7 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // // Arguments: // regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing. -// baseReg - Base register to use when loading values // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. -// reportUnwindData - If true, report the change in unwind data. Otherwise, do not report it. // // Here's an example restore sequence: // ld.d s8,sp,#xxx @@ -275,10 +273,7 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // Return Value: // None. -void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, - regNumber baseReg, - int lowestCalleeSavedOffset, - bool reportUnwindData) +void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset) { // The FP and RA are not in RBM_CALLEE_SAVED. assert(!(regsToRestoreMask & (~RBM_CALLEE_SAVED))); @@ -299,12 +294,8 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, if (maskSaveRegs < 0) { highestCalleeSavedOffset -= REGSIZE_BYTES; - emit->emitIns_R_R_I(INS_fld_d, EA_8BYTE, (regNumber)regNum, baseReg, highestCalleeSavedOffset); - - if (reportUnwindData) - { - m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); - } + emit->emitIns_R_R_I(INS_fld_d, EA_8BYTE, (regNumber)regNum, REG_SP, highestCalleeSavedOffset); + m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); } maskSaveRegs <<= 1; regNum -= 1; @@ -318,12 +309,8 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, if (maskSaveRegs < 0) { highestCalleeSavedOffset -= REGSIZE_BYTES; - emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, (regNumber)regNum, baseReg, highestCalleeSavedOffset); - - if (reportUnwindData) - { - m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); - } + emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, (regNumber)regNum, REG_SP, highestCalleeSavedOffset); + m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); } maskSaveRegs <<= 1; regNum -= 1; @@ -332,31 +319,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, assert(highestCalleeSavedOffset >= 16); // the callee-saved regs always above ra/fp. } -//----------------------------------------------------------------------------- -// genOSRHandleTier0CalleeSavedRegistersAndFrame: -// Handle the tier0 callee saves by restoring them from the original tier0 frame. -// Also report phantom unwind data for the allocated stack by the tier0 frame. -// -void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() -{ - assert(m_compiler->compGeneratingProlog); - assert(m_compiler->opts.IsOSR()); - assert(m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT); - - PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; - regMaskTP tier0CalleeSaves(patchpointInfo->CalleeSaveRegisters()); - - JITDUMP("--OSR--- tier0 has already saved "); - JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); - JITDUMP("\nEmitting restores\n"); - - genRestoreCalleeSavedRegistersHelp(tier0CalleeSaves & ~(RBM_FP | RBM_RA), REG_FP, 16, /* reportUnwindData */ false); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_FP, 8); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_FP, 0); - - m_compiler->unwindAllocStack(patchpointInfo->TotalFrameSize()); -} - // clang-format off /***************************************************************************** * @@ -542,7 +504,7 @@ void CodeGen::genFuncletEpilog(BasicBlock* /* block */) FP_offset = FP_offset & 0xf; } - genRestoreCalleeSavedRegistersHelp(maskSaveRegs, REG_SPBASE, FP_offset + 16, /* reportUnwindData */ true); + genRestoreCalleeSavedRegistersHelp(maskSaveRegs, FP_offset + 16); GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); m_compiler->unwindSaveReg(REG_RA, FP_offset + 8); @@ -6877,7 +6839,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) } JITDUMP(" calleeSaveSPOffset=%d\n", FP_offset + 16); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, REG_SPBASE, FP_offset + 16, /* reportUnwindData */ true); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, FP_offset + 16); emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); m_compiler->unwindSaveReg(REG_RA, FP_offset + 8); diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 6732e8459e018a..86431cb877de10 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -245,9 +245,7 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // // Arguments: // regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing. -// baseReg - Base register to use when loading values // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. -// reportUnwindData - If true, report the change in unwind data. Otherwise, do not report it. // // Here's an example restore sequence: // ld s11, #xxx(sp) @@ -265,10 +263,7 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // Return Value: // None. -void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, - regNumber baseReg, - int lowestCalleeSavedOffset, - bool reportUnwindData) +void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset) { // The FP and RA are not in RBM_CALLEE_SAVED. assert(!(regsToRestoreMask & (~RBM_CALLEE_SAVED))); @@ -289,12 +284,8 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, if (maskSaveRegs < 0) { highestCalleeSavedOffset -= REGSIZE_BYTES; - emit->emitIns_R_R_I(INS_fld, EA_8BYTE, (regNumber)regNum, baseReg, highestCalleeSavedOffset); - - if (reportUnwindData) - { - m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); - } + emit->emitIns_R_R_I(INS_fld, EA_8BYTE, (regNumber)regNum, REG_SP, highestCalleeSavedOffset); + m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); } maskSaveRegs <<= 1; regNum -= 1; @@ -308,12 +299,8 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, if (maskSaveRegs < 0) { highestCalleeSavedOffset -= REGSIZE_BYTES; - emit->emitIns_R_R_I(INS_ld, EA_8BYTE, (regNumber)regNum, baseReg, highestCalleeSavedOffset); - - if (reportUnwindData) - { - m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); - } + emit->emitIns_R_R_I(INS_ld, EA_8BYTE, (regNumber)regNum, REG_SP, highestCalleeSavedOffset); + m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); } maskSaveRegs <<= 1; regNum -= 1; @@ -322,31 +309,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, assert(highestCalleeSavedOffset >= 16); // the callee-saved regs always above ra/fp. } -//----------------------------------------------------------------------------- -// genOSRHandleTier0CalleeSavedRegistersAndFrame: -// Handle the tier0 callee saves by restoring them from the original tier0 frame. -// Also report phantom unwind data for the allocated stack by the tier0 frame. -// -void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() -{ - assert(m_compiler->compGeneratingProlog); - assert(m_compiler->opts.IsOSR()); - assert(m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT); - - PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; - regMaskTP const tier0CalleeSaves(patchpointInfo->CalleeSaveRegisters()); - - JITDUMP("--OSR--- tier0 has already saved "); - JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); - JITDUMP("\nEmitting restores\n"); - - genRestoreCalleeSavedRegistersHelp(tier0CalleeSaves & ~(RBM_FP | RBM_RA), REG_FP, 16, /* reportUnwindData */ false); - GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_FP, 8); - GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_FP, 0); - - m_compiler->unwindAllocStack(patchpointInfo->TotalFrameSize()); -} - // clang-format off /***************************************************************************** * @@ -530,7 +492,7 @@ void CodeGen::genFuncletEpilog(BasicBlock* /* block */) FP_offset = FP_offset & 0xf; } - genRestoreCalleeSavedRegistersHelp(maskSaveRegs, REG_SPBASE, FP_offset + 16, /* reportUnwindData */ true); + genRestoreCalleeSavedRegistersHelp(maskSaveRegs, FP_offset + 16); GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); m_compiler->unwindSaveReg(REG_RA, FP_offset + 8); @@ -6577,7 +6539,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) } JITDUMP(" calleeSaveSPOffset=%d\n", FP_offset + 16); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, REG_SPBASE, FP_offset + 16, /* reportUnwindData */ true); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, FP_offset + 16); emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); m_compiler->unwindSaveReg(REG_RA, FP_offset + 8); diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index a8c072e8cacafe..0ce8d19640e56c 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -85,7 +85,7 @@ void CodeGen::genBeginFnProlog() //------------------------------------------------------------------------ // genPushCalleeSavedRegisters: no-op since we don't need to save anything. // -void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed) +void CodeGen::genPushCalleeSavedRegisters() { } @@ -137,20 +137,11 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni //------------------------------------------------------------------------ // genEnregisterOSRArgsAndLocals: enregister OSR args and locals. // -void CodeGen::genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed) +void CodeGen::genEnregisterOSRArgsAndLocals() { unreached(); // OSR not supported on WASM. } -//------------------------------------------------------------------------ -// genOSRHandleTier0CalleeSavedRegistersAndFrame: -// Not called for WASM without OSR support. -// -void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() -{ - unreached(); -} - //------------------------------------------------------------------------ // genHomeRegisterParams: place register arguments into their RA-assigned locations. // diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 30f278f1f32215..892d3f2def304f 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -9853,11 +9853,11 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) #ifdef TARGET_AMD64 //------------------------------------------------------------------------ -// genOSRHandleTier0CalleeSavedRegistersAndFrame: for OSR methods, record the +// genOSRRecordTier0CalleeSavedRegistersAndFrame: for OSR methods, record the // subset of callee saves already saved by the Tier0 method, and the frame // created by Tier0. // -void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() +void CodeGen::genOSRRecordTier0CalleeSavedRegistersAndFrame() { assert(m_compiler->compGeneratingProlog); assert(m_compiler->opts.IsOSR()); @@ -9999,29 +9999,18 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters() osrAdditionalIntCalleeSaves &= ~regBit; } } -#else - -//------------------------------------------------------------------------ -// genOSRHandleTier0CalleeSavedRegistersAndFrame: -// Not called for x86 without OSR support. -// -void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() -{ - unreached(); -} - #endif // TARGET_AMD64 //------------------------------------------------------------------------ // genPushCalleeSavedRegisters: Push any callee-saved registers we have used. // -void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed) +void CodeGen::genPushCalleeSavedRegisters() { assert(m_compiler->compGeneratingProlog); #if DEBUG // OSR root frames must handle this differently. See - // genOSRHandleTier0CalleeSavedRegistersAndFrame() + // genOSRRecordTier0CalleeSavedRegisters() // genOSRSaveRemainingCalleeSavedRegisters() // if (m_compiler->opts.IsOSR()) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index b1542a133ec3fd..9461a56ff9d71f 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5724,30 +5724,17 @@ void Compiler::generatePatchpointInfo() patchpointInfo->AsyncSynchronizationContextOffset()); } +#if defined(TARGET_AMD64) // Record callee save registers. + // Currently only needed for x64. // regMaskTP rsPushRegs = codeGen->regSet.rsGetModifiedCalleeSavedRegsMask(); rsPushRegs |= RBM_FPBASE; -#if defined(TARGET_ARM64) - rsPushRegs |= RBM_LR; -#elif defined(TARGET_LOONGARCH64) - rsPushRegs |= RBM_RA; -#elif defined(TARGET_RISCV64) - rsPushRegs |= RBM_RA; -#endif - -#ifdef TARGET_ARM64 - // For arm64 we communicate whether fp/lr are stored with the callee saves in this mask. - if (!codeGen->IsSaveFpLrWithAllCalleeSavedRegisters()) - { - rsPushRegs &= ~(RBM_FP | RBM_LR); - } -#endif - - patchpointInfo->SetCalleeSaveRegisters((uint64_t)rsPushRegs.getLow()); + patchpointInfo->SetCalleeSaveRegisters((uint64_t)rsPushRegs); JITDUMP("--OSR-- Tier0 callee saves: "); - JITDUMPEXEC(dspRegMask(regMaskTP((regMaskSmall)patchpointInfo->CalleeSaveRegisters()))); + JITDUMPEXEC(dspRegMask((regMaskTP)patchpointInfo->CalleeSaveRegisters())); JITDUMP("\n"); +#endif // Register this with the runtime. info.compCompHnd->setPatchpointInfo(patchpointInfo); diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index fef4e1175a2235..98d65c17cb2f43 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1756,10 +1756,11 @@ extern "C" void JIT_PatchpointWorkerWorkerWithPolicy(TransitionBlock * pTransiti SetSSP(pFrameContext, ssp - 8); } #endif // TARGET_WINDOWS -#endif // TARGET_AMD64 + + pFrameContext->Rbp = currentFP; +#endif // TARGET_AMD64 SetSP(pFrameContext, currentSP); - SetFP(pFrameContext, currentFP); // Note we can get here w/o triggering, if there is an existing OSR method and // we hit the patchpoint. diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index fda3b8af014b87..ffc5dd3f8b4bd9 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -11414,26 +11414,6 @@ void CEEJitInfo::setPatchpointInfo(PatchpointInfo* patchpointInfo) // We receive ownership of the array _ASSERTE(m_pPatchpointInfoFromJit == NULL); m_pPatchpointInfoFromJit = patchpointInfo; - -#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) - if (m_jitFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_ALT_JIT)) - { - uint32_t ppiSize = patchpointInfo->PatchpointInfoSize(); - - AllocMemTracker am; - void* mem = am.Track(m_pMethodBeingCompiled->GetLoaderAllocator()->GetLowFrequencyHeap()->AllocMem(S_SIZE_T(ppiSize))); - PatchpointInfo *newPpi = new (mem) PatchpointInfo; - newPpi->Initialize(patchpointInfo->NumberOfLocals(), patchpointInfo->TotalFrameSize()); - newPpi->Copy(patchpointInfo); - - HRESULT hr = m_pMethodBeingCompiled->SetMethodDescAltJitPatchpointInfo(newPpi); - if (SUCCEEDED(hr)) - { - am.SuppressRelease(); - } - } -#endif - #else UNREACHABLE(); #endif @@ -11457,18 +11437,6 @@ PatchpointInfo* CEEJitInfo::getOSRInfo(unsigned* ilOffset) #ifdef FEATURE_ON_STACK_REPLACEMENT result = m_pPatchpointInfoFromRuntime; *ilOffset = m_ilOffset; - -#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) - if (m_jitFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_ALT_JIT)) - { - PatchpointInfo* ppi = m_pMethodBeingCompiled->GetMethodDescAltJitPatchpointInfo(); - if (ppi != NULL) - { - result = ppi; - } - } -#endif - #endif EE_TO_JIT_TRANSITION(); diff --git a/src/coreclr/vm/method.cpp b/src/coreclr/vm/method.cpp index 79040c69c9e08b..48a41f486b8d01 100644 --- a/src/coreclr/vm/method.cpp +++ b/src/coreclr/vm/method.cpp @@ -274,29 +274,6 @@ void MethodDesc::SetMethodDescOptimizationTier(NativeCodeVersion::OptimizationTi _ASSERTE(m_codeData != NULL); VolatileStoreWithoutBarrier(&m_codeData->OptimizationTier, tier); } - -#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) -HRESULT MethodDesc::SetMethodDescAltJitPatchpointInfo(PatchpointInfo* pInfo) -{ - WRAPPER_NO_CONTRACT; - - HRESULT hr; - IfFailRet(EnsureCodeDataExists(NULL)); - - _ASSERTE(m_codeData != NULL); - VolatileStoreWithoutBarrier(&m_codeData->AltJitPatchpointInfo, pInfo); - return S_OK; -} - -PatchpointInfo* MethodDesc::GetMethodDescAltJitPatchpointInfo() -{ - WRAPPER_NO_CONTRACT; - if (m_codeData == NULL) - return nullptr; - return VolatileLoadWithoutBarrier(&m_codeData->AltJitPatchpointInfo); -} -#endif // _DEBUG && ALLOW_SXS_JIT - #endif // FEATURE_CODE_VERSIONING #ifdef FEATURE_INTERPRETER diff --git a/src/coreclr/vm/method.hpp b/src/coreclr/vm/method.hpp index 3138b7327bd612..fa037eea4bd487 100644 --- a/src/coreclr/vm/method.hpp +++ b/src/coreclr/vm/method.hpp @@ -262,9 +262,6 @@ struct MethodDescCodeData final #ifdef FEATURE_INTERPRETER CallStubHeader *CallStub; #endif // FEATURE_INTERPRETER -#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) - PatchpointInfo *AltJitPatchpointInfo; -#endif // _DEBUG && ALLOW_SXS_JIT }; using PTR_MethodDescCodeData = DPTR(MethodDescCodeData); @@ -1993,11 +1990,6 @@ class MethodDesc #ifndef DACCESS_COMPILE HRESULT SetMethodDescVersionState(PTR_MethodDescVersioningState state); void SetMethodDescOptimizationTier(NativeCodeVersion::OptimizationTier tier); - -#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) - HRESULT SetMethodDescAltJitPatchpointInfo(PatchpointInfo* pInfo); - PatchpointInfo* GetMethodDescAltJitPatchpointInfo(); -#endif #endif // !DACCESS_COMPILE PTR_MethodDescVersioningState GetMethodDescVersionState(); NativeCodeVersion::OptimizationTier GetMethodDescOptimizationTier(); From 0d2d0ddb09ff5761cc1c5e7db63a8596ba5e5fe0 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 23 Apr 2026 10:40:17 +0100 Subject: [PATCH 20/65] Restore changes from "JIT: Restore arm64, LA64 and RISCV64 OSR callee saves from tier0 frame (#126880)" This reverts commit a02fac873f2a0351b31cca420dfd9d68f784d470. --- src/coreclr/inc/patchpointinfo.h | 4 + src/coreclr/jit/codegen.h | 49 +++--- src/coreclr/jit/codegenarm.cpp | 9 + src/coreclr/jit/codegenarm64.cpp | 217 +++++++++++++++++++------ src/coreclr/jit/codegenarmarch.cpp | 8 +- src/coreclr/jit/codegencommon.cpp | 113 ++++--------- src/coreclr/jit/codegenloongarch64.cpp | 52 +++++- src/coreclr/jit/codegenriscv64.cpp | 52 +++++- src/coreclr/jit/codegenwasm.cpp | 13 +- src/coreclr/jit/codegenxarch.cpp | 19 ++- src/coreclr/jit/compiler.cpp | 23 ++- src/coreclr/vm/jithelpers.cpp | 5 +- src/coreclr/vm/jitinterface.cpp | 32 ++++ src/coreclr/vm/method.cpp | 23 +++ src/coreclr/vm/method.hpp | 8 + 15 files changed, 442 insertions(+), 185 deletions(-) diff --git a/src/coreclr/inc/patchpointinfo.h b/src/coreclr/inc/patchpointinfo.h index 16e3c87d8a7137..1de678608de824 100644 --- a/src/coreclr/inc/patchpointinfo.h +++ b/src/coreclr/inc/patchpointinfo.h @@ -47,6 +47,8 @@ struct PatchpointInfo m_keptAliveThisOffset = -1; m_securityCookieOffset = -1; m_monitorAcquiredOffset = -1; + m_asyncExecutionContextOffset = -1; + m_asyncSynchronizationContextOffset = -1; } // Copy @@ -58,6 +60,8 @@ struct PatchpointInfo m_keptAliveThisOffset = original->m_keptAliveThisOffset; m_securityCookieOffset = original->m_securityCookieOffset; m_monitorAcquiredOffset = original->m_monitorAcquiredOffset; + m_asyncExecutionContextOffset = original->m_asyncExecutionContextOffset; + m_asyncSynchronizationContextOffset = original->m_asyncSynchronizationContextOffset; for (uint32_t i = 0; i < original->m_numberOfLocals; i++) { diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 8a9e8f06e4f428..414042f6822bb0 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -348,11 +348,7 @@ class CodeGen final : public CodeGenInterface unsigned lclNum, unsigned offset, unsigned paramLclNum, const ABIPassingSegment& seg, class RegGraph* graph); void genSpillOrAddNonStandardRegisterParam(unsigned lclNum, regNumber sourceReg, class RegGraph* graph); void genEnregisterIncomingStackArgs(); -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed); -#else - void genEnregisterOSRArgsAndLocals(); -#endif void genHomeStackSegment(unsigned lclNum, const ABIPassingSegment& seg, regNumber initReg, bool* pInitRegZeroed); void genHomeSwiftStructStackParameters(); @@ -363,6 +359,7 @@ class CodeGen final : public CodeGenInterface void genClearStackVec3ArgUpperBits(); #endif // UNIX_AMD64_ABI && FEATURE_SIMD + void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); #if defined(TARGET_ARM64) bool genInstrWithConstant(instruction ins, emitAttr attr, @@ -384,15 +381,23 @@ class CodeGen final : public CodeGenInterface void genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero); - void genEpilogRestoreRegPair(regNumber reg1, - regNumber reg2, - int spOffset, - int spDelta, - bool useSaveNextPair, - regNumber tmpReg, - bool* pTmpRegIsZero); - - void genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero); + void genRestoreRegPair(regNumber reg1, + regNumber reg2, + regNumber baseReg, + int spOffset, + int spDelta, + bool useSaveNextPair, + regNumber tmpReg, + bool* pTmpRegIsZero, + bool reportUnwindData); + + void genRestoreReg(regNumber reg1, + regNumber baseReg, + int spOffset, + int spDelta, + regNumber tmpReg, + bool* pTmpRegIsZero, + bool reportUnwindData); // A simple struct to keep register pairs for prolog and epilog. struct RegPair @@ -423,13 +428,12 @@ class CodeGen final : public CodeGenInterface static int genGetSlotSizeForRegsInMask(regMaskTP regsMask); void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset); - void genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset); + void genRestoreCalleeSavedRegisterGroup( + regMaskTP regsMask, regNumber baseReg, int spDelta, int spOffset, bool reportUnwindData); void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta); void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta); - void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); - #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool genInstrWithConstant(instruction ins, emitAttr attr, @@ -442,17 +446,14 @@ class CodeGen final : public CodeGenInterface void genStackPointerAdjustment(ssize_t spAdjustment, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData); void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset); - void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset); - void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); - -#else - void genPushCalleeSavedRegisters(); + void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, + regNumber baseReg, + int lowestCalleeSavedOffset, + bool reportUnwindData); #endif -#if defined(TARGET_AMD64) - void genOSRRecordTier0CalleeSavedRegistersAndFrame(); + void genOSRHandleTier0CalleeSavedRegistersAndFrame(); void genOSRSaveRemainingCalleeSavedRegisters(); -#endif // TARGET_AMD64 void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn); diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 2d2ab0026bd9b0..e28571af1629e2 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -1853,6 +1853,15 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) #endif // PROFILING_SUPPORTED +//------------------------------------------------------------------------ +// genOSRHandleTier0CalleeSavedRegistersAndFrame: +// Not called for arm without OSR support. +// +void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() +{ + unreached(); +} + //------------------------------------------------------------------------ // genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer. // diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 2b82d6a1095ec1..8880e8fe6b2ebc 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -132,7 +132,8 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) // add sp,sp,#remainingFrameSz JITDUMP(" alignmentAdjustment2=%d\n", alignmentAdjustment2); - genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, false, REG_IP1, nullptr); + genRestoreRegPair(REG_FP, REG_LR, REG_SPBASE, alignmentAdjustment2, spAdjustment2, false, REG_IP1, + nullptr, /* reportUnwindData */ true); } else { @@ -153,8 +154,8 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) JITDUMP(" remainingFrameSz=%d\n", remainingFrameSz); - genEpilogRestoreRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, false, - REG_IP1, nullptr); + genRestoreRegPair(REG_FP, REG_LR, REG_SPBASE, m_compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, + false, REG_IP1, nullptr, /* reportUnwindData */ true); } // Unlike frameType=1 or frameType=2 that restore SP at the end, @@ -488,6 +489,7 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, if (spDelta != 0) { assert(!useSaveNextPair); + if ((spOffset == 0) && (spDelta >= -512)) { // We can use pre-indexed addressing when the stack adjustment fits in the instruction. @@ -592,7 +594,7 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum } //------------------------------------------------------------------------ -// genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog. +// genRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog. // The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing. // The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that // instruction. @@ -600,7 +602,8 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum // Arguments: // reg1 - First register of pair to restore. // reg2 - Second register of pair to restore. -// spOffset - The offset from SP to load reg1 (must be positive or zero). +// baseReg - Base register to load values from +// spOffset - The offset from the base register to load reg1 // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or // zero). // useSaveNextPair - True if the last prolog instruction was to save the previous register pair. This @@ -612,15 +615,17 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum // Return Value: // None. -void CodeGen::genEpilogRestoreRegPair(regNumber reg1, - regNumber reg2, - int spOffset, - int spDelta, - bool useSaveNextPair, - regNumber tmpReg, - bool* pTmpRegIsZero) +void CodeGen::genRestoreRegPair(regNumber reg1, + regNumber reg2, + regNumber baseReg, + int spOffset, + int spDelta, + bool useSaveNextPair, + regNumber tmpReg, + bool* pTmpRegIsZero, + bool reportUnwindData) { - assert(spOffset >= 0); + assert((spOffset >= -512) && (spOffset <= 504)); assert(spDelta >= 0); assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both @@ -634,43 +639,53 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, { // Fold the SP change into this instruction. // ldp reg1, reg2, [SP], #spDelta - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX); - m_compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta); + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, baseReg, spDelta, INS_OPTS_POST_INDEX); + + if (reportUnwindData) + { + m_compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta); + } } else // (spOffset != 0) || (spDelta > 504) { // Can't fold in the SP change; need to use a separate ADD instruction. // ldp reg1, reg2, [SP, #offset] - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); - m_compiler->unwindSaveRegPair(reg1, reg2, spOffset); + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, baseReg, spOffset); + if (reportUnwindData) + { + m_compiler->unwindSaveRegPair(reg1, reg2, spOffset); + } // generate add SP,SP,imm - genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ reportUnwindData); } } else { - GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); + GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, baseReg, spOffset); - if (TargetOS::IsUnix && m_compiler->generateCFIUnwindCodes()) + if (reportUnwindData) { - useSaveNextPair = false; - } + if (TargetOS::IsUnix && m_compiler->generateCFIUnwindCodes()) + { + useSaveNextPair = false; + } - if (useSaveNextPair) - { - m_compiler->unwindSaveNext(); - } - else - { - m_compiler->unwindSaveRegPair(reg1, reg2, spOffset); + if (useSaveNextPair) + { + m_compiler->unwindSaveNext(); + } + else + { + m_compiler->unwindSaveRegPair(reg1, reg2, spOffset); + } } } } //------------------------------------------------------------------------ -// genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog. +// genRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog. // // Arguments: // reg1 - Register to restore. @@ -684,9 +699,14 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, // Return Value: // None. -void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) +void CodeGen::genRestoreReg(regNumber reg1, + regNumber baseReg, + int spOffset, + int spDelta, + regNumber tmpReg, + bool* pTmpRegIsZero, + bool reportUnwindData) { - assert(spOffset >= 0); assert(spDelta >= 0); assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned @@ -696,24 +716,36 @@ void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, reg { // We can use post-index addressing. // ldr REG, [SP], #spDelta - GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX); - m_compiler->unwindSaveRegPreindexed(reg1, -spDelta); + GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, baseReg, spDelta, INS_OPTS_POST_INDEX); + + if (reportUnwindData) + { + m_compiler->unwindSaveRegPreindexed(reg1, -spDelta); + } } else // (spOffset != 0) || (spDelta > 255) { // ldr reg1, [SP, #offset] - GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); - m_compiler->unwindSaveReg(reg1, spOffset); + GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, baseReg, spOffset); + + if (reportUnwindData) + { + m_compiler->unwindSaveReg(reg1, spOffset); + } // generate add SP,SP,imm - genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); + genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, reportUnwindData); } } else { // ldr reg1, [SP, #offset] - GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); - m_compiler->unwindSaveReg(reg1, spOffset); + GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, baseReg, spOffset); + + if (reportUnwindData) + { + m_compiler->unwindSaveReg(reg1, spOffset); + } } } @@ -977,9 +1009,10 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // Arguments: // regsMask - a mask of registers for epilog generation; // spDelta - if non-zero, the amount to add to SP after the last register restore (or together with it); -// spOffset - the offset from SP that is the beginning of the callee-saved register area; +// spOffset - the offset from SP that is the top of the callee-saved register area; // -void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) +void CodeGen::genRestoreCalleeSavedRegisterGroup( + regMaskTP regsMask, regNumber baseReg, int spDelta, int spOffset, bool reportUnwindData) { const int slotSize = genGetSlotSizeForRegsInMask(regsMask); @@ -1005,18 +1038,19 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta if (genReverseAndPairCalleeSavedRegisters) { - genEpilogRestoreRegPair(regPair.reg2, regPair.reg1, spOffset, stackDelta, false, REG_IP1, nullptr); + genRestoreRegPair(regPair.reg2, regPair.reg1, baseReg, spOffset, stackDelta, false, REG_IP1, nullptr, + reportUnwindData); } else { - genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, regPair.useSaveNextPair, - REG_IP1, nullptr); + genRestoreRegPair(regPair.reg1, regPair.reg2, baseReg, spOffset, stackDelta, regPair.useSaveNextPair, + REG_IP1, nullptr, reportUnwindData); } } else { spOffset -= slotSize; - genEpilogRestoreReg(regPair.reg1, spOffset, stackDelta, REG_IP1, nullptr); + genRestoreReg(regPair.reg1, baseReg, spOffset, stackDelta, REG_IP1, nullptr, reportUnwindData); } } } @@ -1088,20 +1122,23 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in { int spFrameDelta = (maskRestoreRegsFloat != RBM_NONE || maskRestoreRegsInt != RBM_NONE) ? 0 : spDelta; spOffset -= 2 * REGSIZE_BYTES; - genEpilogRestoreRegPair(REG_FP, REG_LR, spOffset, spFrameDelta, false, REG_IP1, nullptr); + genRestoreRegPair(REG_FP, REG_LR, REG_SPBASE, spOffset, spFrameDelta, false, REG_IP1, nullptr, + /* reportUnwindData */ true); } if (maskRestoreRegsInt != RBM_NONE) { int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment? - genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spIntDelta, spOffset); + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, REG_SPBASE, spIntDelta, spOffset, + /* reportUnwindData */ true); spOffset -= genCountBits(maskRestoreRegsInt) * REGSIZE_BYTES; } if (maskRestoreRegsFloat != RBM_NONE) { // If there is any spDelta, it must be used here. - genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, spDelta, spOffset); + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, REG_SPBASE, spDelta, spOffset, + /* reportUnwindData */ true); // No need to update spOffset since it's not used after this. } } @@ -5549,6 +5586,90 @@ void CodeGen::genStoreLclTypeSimd12(GenTreeLclVarCommon* treeNode) #endif // FEATURE_SIMD +//----------------------------------------------------------------------------- +// genOSRHandleTier0CalleeSavedRegistersAndFrame: +// Handle the tier0 callee saves by restoring them from the original tier0 frame. +// Also report phantom unwind data for the allocated stack by the tier0 frame. +// +void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() +{ + assert(m_compiler->compGeneratingProlog); + assert(m_compiler->opts.IsOSR()); + assert(m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT); + + PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; + regMaskTP const tier0CalleeSaves((regMaskSmall)patchpointInfo->CalleeSaveRegisters()); + + JITDUMP("--OSR--- tier0 has already saved "); + JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); + JITDUMP("\nEmitting restores\n"); + + // Note: the restore of LR relies on the tier0 method having been unhijacked when the OSR method prolog runs. + // This happens in the transition helper. If transition helper is not used (e.g. because we directly jump into OSR) + // then hijacking tier0 is not supported -- this is similar to tailcalls so the situation can be recorded via + // SetHasTailCalls. + + regMaskTP restoreRegsFrame = tier0CalleeSaves & (RBM_FP | RBM_LR); + regMaskTP restoreRegsFloat = tier0CalleeSaves & RBM_ALLFLOAT; + regMaskTP restoreRegsInt = tier0CalleeSaves & ~restoreRegsFrame & ~restoreRegsFloat; + + regNumber baseReg; + int topOfCalleeSaves; + if (restoreRegsFrame != RBM_NONE) + { + // FP/LR was saved with the callee saves. It is always at the top. + // Restore rest of callee saves with the offset from FP. + baseReg = REG_FP; + topOfCalleeSaves = 0; + } + else + { + // FP/LR was not saved with the callee saves. Here we do not actually + // know the offset from FP to the callee saves, but we do know the + // offset from SP. + baseReg = REG_SP; + topOfCalleeSaves = patchpointInfo->TotalFrameSize(); + if (m_compiler->info.compIsVarArgs) + { + topOfCalleeSaves -= MAX_REG_ARG * REGSIZE_BYTES; + } + + if ((topOfCalleeSaves > 504) && ((restoreRegsInt != RBM_NONE) || (restoreRegsFloat != RBM_NONE))) + { + // Too far to encode ldp with sp directly. Compute top into another register. + // Note: not reporting unwind nops for this as we will pad below anyway. + genInstrWithConstant(INS_add, EA_PTRSIZE, REG_IP0, REG_SP, topOfCalleeSaves, REG_IP0, + /* inUnwindRegion */ false); + baseReg = REG_IP0; + topOfCalleeSaves = 0; + } + } + + if (restoreRegsInt != RBM_NONE) + { + genRestoreCalleeSavedRegisterGroup(restoreRegsInt, baseReg, 0, topOfCalleeSaves, /* reportUnwindData */ false); + topOfCalleeSaves -= genCountBits(restoreRegsInt) * REGSIZE_BYTES; + } + + if (restoreRegsFloat != RBM_NONE) + { + genRestoreCalleeSavedRegisterGroup(restoreRegsFloat, baseReg, 0, topOfCalleeSaves, + /* reportUnwindData */ false); + topOfCalleeSaves -= genCountBits(restoreRegsFloat) * REGSIZE_BYTES; + } + + // Regardless of frame type fp always points to the saved fp/lr for frame + // pointer chaining purposes, so restoring them is trivial. + genRestoreRegPair(REG_FP, REG_LR, REG_FP, 0, 0, false, REG_IP1, nullptr, + /* reportUnwindData */ false); + + // Emit phantom unwind data for the tier0 frame. + m_compiler->unwindAllocStack(patchpointInfo->TotalFrameSize()); + // Emit nops to make the prolog 1:1 in unwind codes to instructions. This + // is needed for win-arm64. + m_compiler->unwindPadding(); +} + #ifdef PROFILING_SUPPORTED //----------------------------------------------------------------------------------- diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 9d03a501c2b88c..af6f625c6372b0 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4278,16 +4278,12 @@ void CodeGen::genSIMDSplitReturn(GenTree* src, const ReturnTypeDesc* retTypeDesc //------------------------------------------------------------------------ // genPushCalleeSavedRegisters: Push any callee-saved registers we have used. // -// Arguments (arm64): +// Arguments: // initReg - A scratch register (that gets set to zero on some platforms). // pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'true' if this method sets initReg register to zero, // 'false' if initReg was set to a non-zero value, and left unchanged if initReg was not touched. // -#if defined(TARGET_ARM64) void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed) -#else -void CodeGen::genPushCalleeSavedRegisters() -#endif { assert(m_compiler->compGeneratingProlog); @@ -4776,6 +4772,7 @@ void CodeGen::genPushCalleeSavedRegisters() JITDUMP(" spAdjustment2=%d\n", spAdjustment2); genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed); + offset += spAdjustment2; // Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub" @@ -4802,6 +4799,7 @@ void CodeGen::genPushCalleeSavedRegisters() { genPrologSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg, pInitRegZeroed); + offset += remainingFrameSz; offsetSpToSavedFp = m_compiler->lvaOutgoingArgSpaceSize; diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index d01f10408d9bfe..e3e810308d7afd 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4135,11 +4135,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, // initReg -- scratch register to use if needed // pInitRegZeroed -- [IN,OUT] if init reg is zero (on entry/exit) // -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void CodeGen::genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed) -#else -void CodeGen::genEnregisterOSRArgsAndLocals() -#endif { assert(m_compiler->opts.IsOSR()); PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; @@ -5053,23 +5049,6 @@ void CodeGen::genFnProlog() genBeginFnProlog(); -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // For arm64 OSR, emit a "phantom prolog" to account for the actions taken - // in the tier0 frame that impact FP and SP on entry to the OSR method. - // - // x64 handles this differently; the phantom prolog unwind is emitted in - // genOSRRecordTier0CalleeSavedRegistersAndFrame. - // - if (m_compiler->opts.IsOSR()) - { - PatchpointInfo* patchpointInfo = m_compiler->info.compPatchpointInfo; - const int tier0FrameSize = patchpointInfo->TotalFrameSize(); - - // SP is tier0 method's SP. - m_compiler->unwindAllocStack(tier0FrameSize); - } -#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - #ifdef DEBUG if (m_compiler->compJitHaltMethod()) @@ -5320,11 +5299,7 @@ void CodeGen::genFnProlog() const bool isRoot = (m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT); -#ifdef TARGET_AMD64 - const bool isOSRx64Root = isRoot && m_compiler->opts.IsOSR(); -#else - const bool isOSRx64Root = false; -#endif // TARGET_AMD64 + const bool inheritsCalleeSaves = isRoot && m_compiler->opts.IsOSR(); regMaskTP tempMask = initRegs & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd; @@ -5348,37 +5323,16 @@ void CodeGen::genFnProlog() } } -#if defined(TARGET_AMD64) - // For x64 OSR root frames, we can't use any as of yet unsaved + // For OSR root frames, we can't use any as of yet unsaved // callee save as initReg, as we defer saving these until later in // the prolog, and we don't have normal arg regs. - if (isOSRx64Root) - { - initReg = REG_SCRATCH; // REG_EAX - } -#elif defined(TARGET_ARM64) - // For arm64 OSR root frames, we may need a scratch register for large - // offset addresses. Use a register that won't be allocated. - // - if (isRoot && m_compiler->opts.IsOSR()) - { - initReg = REG_IP1; - } -#elif defined(TARGET_LOONGARCH64) - // For LoongArch64 OSR root frames, we may need a scratch register for large - // offset addresses. Use a register that won't be allocated. - if (isRoot && m_compiler->opts.IsOSR()) + if (inheritsCalleeSaves) { initReg = REG_SCRATCH; - } -#elif defined(TARGET_RISCV64) - // For RISC-V64 OSR root frames, we may need a scratch register for large - // offset addresses. Use a register that won't be allocated. - if (isRoot && m_compiler->opts.IsOSR()) - { - initReg = REG_SCRATCH; // REG_T0 - } +#if defined(TARGET_ARM64) + initReg = REG_IP1; #endif + } #if defined(TARGET_AMD64) // If we are a varargs call, in order to set up the arguments correctly this @@ -5412,30 +5366,29 @@ void CodeGen::genFnProlog() } #endif // TARGET_ARM #else // TARGET_WASM - regNumber initReg = REG_NA; - bool initRegZeroed = false; - bool isOSRx64Root = false; + regNumber initReg = REG_NA; + bool initRegZeroed = false; + bool inheritsCalleeSaves = false; #endif // TARGET_WASM unsigned extraFrameSize = 0; -#ifdef TARGET_XARCH - -#ifdef TARGET_AMD64 - if (isOSRx64Root) + if (inheritsCalleeSaves) { // Account for the Tier0 callee saves // - genOSRRecordTier0CalleeSavedRegistersAndFrame(); + genOSRHandleTier0CalleeSavedRegistersAndFrame(); +#ifdef TARGET_AMD64 // We don't actually push any callee saves on the OSR frame, // but we still reserve space, so account for this when // allocating the local frame. // extraFrameSize = m_compiler->compCalleeRegsPushed * REGSIZE_BYTES; +#endif } -#endif // TARGET_AMD64 +#ifdef TARGET_XARCH if (doubleAlignOrFramePointerUsed()) { // OSR methods handle "saving" FP specially. @@ -5444,7 +5397,7 @@ void CodeGen::genFnProlog() // Tier0 method. The save we do here is just to set up a // proper RBP-based frame chain link. // - if (isOSRx64Root && isFramePointerUsed()) + if (inheritsCalleeSaves && isFramePointerUsed()) { GetEmitter()->emitIns_R_AR(INS_mov, EA_8BYTE, initReg, REG_FPBASE, 0); inst_RV(INS_push, initReg, TYP_REF); @@ -5460,9 +5413,10 @@ void CodeGen::genFnProlog() inst_RV(INS_push, REG_FPBASE, TYP_REF); m_compiler->unwindPush(REG_FPBASE); } -#ifndef TARGET_AMD64 // On AMD64, establish the frame pointer after the "sub rsp" +#ifdef TARGET_X86 + // On x86 establish frame pointer now. For x64 we establish it after the "sub rsp". genEstablishFramePointer(0, /*reportUnwindData*/ true); -#endif // !TARGET_AMD64 +#endif // TARGET_X86 #if DOUBLE_ALIGN if (m_compiler->genDoubleAlign()) @@ -5476,16 +5430,20 @@ void CodeGen::genFnProlog() } #endif // TARGET_XARCH -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - genPushCalleeSavedRegisters(initReg, &initRegZeroed); - -#else // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 + bool pushesCalleeSaves = true; +#ifdef TARGET_AMD64 + // For OSR x64 we need canonical epilogs (sequence of pops). Hence we do + // not push any register in the prolog, we rather store them in the area + // allocated by the tier0 method. For OSR on other platforms we have no + // such requirement, instead we restore tier0 saved callee saves from its + // area on entry and then run the prolog as normal. + pushesCalleeSaves = !inheritsCalleeSaves; +#endif - if (!isOSRx64Root) + if (pushesCalleeSaves) { - genPushCalleeSavedRegisters(); + genPushCalleeSavedRegisters(initReg, &initRegZeroed); } -#endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 #ifdef TARGET_ARM bool needToEstablishFP = false; @@ -5530,14 +5488,14 @@ void CodeGen::genFnProlog() } #endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 -#ifdef TARGET_AMD64 - // For x64 OSR we have to finish saving int callee saves. + // For x64 OSR we have to finish saving callee saves. // - if (isOSRx64Root) +#ifdef TARGET_AMD64 + if (inheritsCalleeSaves) { genOSRSaveRemainingCalleeSavedRegisters(); } -#endif // TARGET_AMD64 +#endif //------------------------------------------------------------------------- @@ -5667,12 +5625,7 @@ void CodeGen::genFnProlog() // we've set the live-in regs with values from the Tier0 frame. // // Otherwise we'll do some of these fetches twice. - -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) genEnregisterOSRArgsAndLocals(initReg, &initRegZeroed); -#else - genEnregisterOSRArgsAndLocals(); -#endif // OSR functions take no parameters in registers. Ensure no mappings // are present. assert((m_compiler->m_paramRegLocalMappings == nullptr) || m_compiler->m_paramRegLocalMappings->Empty()); diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 5bca27b0247552..b7e548b874b233 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -257,7 +257,9 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // // Arguments: // regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing. +// baseReg - Base register to use when loading values // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. +// reportUnwindData - If true, report the change in unwind data. Otherwise, do not report it. // // Here's an example restore sequence: // ld.d s8,sp,#xxx @@ -273,7 +275,10 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // Return Value: // None. -void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset) +void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, + regNumber baseReg, + int lowestCalleeSavedOffset, + bool reportUnwindData) { // The FP and RA are not in RBM_CALLEE_SAVED. assert(!(regsToRestoreMask & (~RBM_CALLEE_SAVED))); @@ -294,8 +299,12 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in if (maskSaveRegs < 0) { highestCalleeSavedOffset -= REGSIZE_BYTES; - emit->emitIns_R_R_I(INS_fld_d, EA_8BYTE, (regNumber)regNum, REG_SP, highestCalleeSavedOffset); - m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); + emit->emitIns_R_R_I(INS_fld_d, EA_8BYTE, (regNumber)regNum, baseReg, highestCalleeSavedOffset); + + if (reportUnwindData) + { + m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); + } } maskSaveRegs <<= 1; regNum -= 1; @@ -309,8 +318,12 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in if (maskSaveRegs < 0) { highestCalleeSavedOffset -= REGSIZE_BYTES; - emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, (regNumber)regNum, REG_SP, highestCalleeSavedOffset); - m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); + emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, (regNumber)regNum, baseReg, highestCalleeSavedOffset); + + if (reportUnwindData) + { + m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); + } } maskSaveRegs <<= 1; regNum -= 1; @@ -319,6 +332,31 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in assert(highestCalleeSavedOffset >= 16); // the callee-saved regs always above ra/fp. } +//----------------------------------------------------------------------------- +// genOSRHandleTier0CalleeSavedRegistersAndFrame: +// Handle the tier0 callee saves by restoring them from the original tier0 frame. +// Also report phantom unwind data for the allocated stack by the tier0 frame. +// +void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() +{ + assert(m_compiler->compGeneratingProlog); + assert(m_compiler->opts.IsOSR()); + assert(m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT); + + PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; + regMaskTP tier0CalleeSaves(patchpointInfo->CalleeSaveRegisters()); + + JITDUMP("--OSR--- tier0 has already saved "); + JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); + JITDUMP("\nEmitting restores\n"); + + genRestoreCalleeSavedRegistersHelp(tier0CalleeSaves & ~(RBM_FP | RBM_RA), REG_FP, 16, /* reportUnwindData */ false); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_FP, 8); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_FP, 0); + + m_compiler->unwindAllocStack(patchpointInfo->TotalFrameSize()); +} + // clang-format off /***************************************************************************** * @@ -504,7 +542,7 @@ void CodeGen::genFuncletEpilog(BasicBlock* /* block */) FP_offset = FP_offset & 0xf; } - genRestoreCalleeSavedRegistersHelp(maskSaveRegs, FP_offset + 16); + genRestoreCalleeSavedRegistersHelp(maskSaveRegs, REG_SPBASE, FP_offset + 16, /* reportUnwindData */ true); GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); m_compiler->unwindSaveReg(REG_RA, FP_offset + 8); @@ -6839,7 +6877,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) } JITDUMP(" calleeSaveSPOffset=%d\n", FP_offset + 16); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, FP_offset + 16); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, REG_SPBASE, FP_offset + 16, /* reportUnwindData */ true); emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); m_compiler->unwindSaveReg(REG_RA, FP_offset + 8); diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 86431cb877de10..6732e8459e018a 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -245,7 +245,9 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // // Arguments: // regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing. +// baseReg - Base register to use when loading values // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. +// reportUnwindData - If true, report the change in unwind data. Otherwise, do not report it. // // Here's an example restore sequence: // ld s11, #xxx(sp) @@ -263,7 +265,10 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // Return Value: // None. -void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset) +void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, + regNumber baseReg, + int lowestCalleeSavedOffset, + bool reportUnwindData) { // The FP and RA are not in RBM_CALLEE_SAVED. assert(!(regsToRestoreMask & (~RBM_CALLEE_SAVED))); @@ -284,8 +289,12 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in if (maskSaveRegs < 0) { highestCalleeSavedOffset -= REGSIZE_BYTES; - emit->emitIns_R_R_I(INS_fld, EA_8BYTE, (regNumber)regNum, REG_SP, highestCalleeSavedOffset); - m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); + emit->emitIns_R_R_I(INS_fld, EA_8BYTE, (regNumber)regNum, baseReg, highestCalleeSavedOffset); + + if (reportUnwindData) + { + m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); + } } maskSaveRegs <<= 1; regNum -= 1; @@ -299,8 +308,12 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in if (maskSaveRegs < 0) { highestCalleeSavedOffset -= REGSIZE_BYTES; - emit->emitIns_R_R_I(INS_ld, EA_8BYTE, (regNumber)regNum, REG_SP, highestCalleeSavedOffset); - m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); + emit->emitIns_R_R_I(INS_ld, EA_8BYTE, (regNumber)regNum, baseReg, highestCalleeSavedOffset); + + if (reportUnwindData) + { + m_compiler->unwindSaveReg((regNumber)regNum, highestCalleeSavedOffset); + } } maskSaveRegs <<= 1; regNum -= 1; @@ -309,6 +322,31 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in assert(highestCalleeSavedOffset >= 16); // the callee-saved regs always above ra/fp. } +//----------------------------------------------------------------------------- +// genOSRHandleTier0CalleeSavedRegistersAndFrame: +// Handle the tier0 callee saves by restoring them from the original tier0 frame. +// Also report phantom unwind data for the allocated stack by the tier0 frame. +// +void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() +{ + assert(m_compiler->compGeneratingProlog); + assert(m_compiler->opts.IsOSR()); + assert(m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT); + + PatchpointInfo* const patchpointInfo = m_compiler->info.compPatchpointInfo; + regMaskTP const tier0CalleeSaves(patchpointInfo->CalleeSaveRegisters()); + + JITDUMP("--OSR--- tier0 has already saved "); + JITDUMPEXEC(dspRegMask(tier0CalleeSaves)); + JITDUMP("\nEmitting restores\n"); + + genRestoreCalleeSavedRegistersHelp(tier0CalleeSaves & ~(RBM_FP | RBM_RA), REG_FP, 16, /* reportUnwindData */ false); + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_FP, 8); + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_FP, 0); + + m_compiler->unwindAllocStack(patchpointInfo->TotalFrameSize()); +} + // clang-format off /***************************************************************************** * @@ -492,7 +530,7 @@ void CodeGen::genFuncletEpilog(BasicBlock* /* block */) FP_offset = FP_offset & 0xf; } - genRestoreCalleeSavedRegistersHelp(maskSaveRegs, FP_offset + 16); + genRestoreCalleeSavedRegistersHelp(maskSaveRegs, REG_SPBASE, FP_offset + 16, /* reportUnwindData */ true); GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); m_compiler->unwindSaveReg(REG_RA, FP_offset + 8); @@ -6539,7 +6577,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) } JITDUMP(" calleeSaveSPOffset=%d\n", FP_offset + 16); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, FP_offset + 16); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, REG_SPBASE, FP_offset + 16, /* reportUnwindData */ true); emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); m_compiler->unwindSaveReg(REG_RA, FP_offset + 8); diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 0ce8d19640e56c..a8c072e8cacafe 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -85,7 +85,7 @@ void CodeGen::genBeginFnProlog() //------------------------------------------------------------------------ // genPushCalleeSavedRegisters: no-op since we don't need to save anything. // -void CodeGen::genPushCalleeSavedRegisters() +void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed) { } @@ -137,11 +137,20 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni //------------------------------------------------------------------------ // genEnregisterOSRArgsAndLocals: enregister OSR args and locals. // -void CodeGen::genEnregisterOSRArgsAndLocals() +void CodeGen::genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed) { unreached(); // OSR not supported on WASM. } +//------------------------------------------------------------------------ +// genOSRHandleTier0CalleeSavedRegistersAndFrame: +// Not called for WASM without OSR support. +// +void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() +{ + unreached(); +} + //------------------------------------------------------------------------ // genHomeRegisterParams: place register arguments into their RA-assigned locations. // diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 892d3f2def304f..30f278f1f32215 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -9853,11 +9853,11 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) #ifdef TARGET_AMD64 //------------------------------------------------------------------------ -// genOSRRecordTier0CalleeSavedRegistersAndFrame: for OSR methods, record the +// genOSRHandleTier0CalleeSavedRegistersAndFrame: for OSR methods, record the // subset of callee saves already saved by the Tier0 method, and the frame // created by Tier0. // -void CodeGen::genOSRRecordTier0CalleeSavedRegistersAndFrame() +void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() { assert(m_compiler->compGeneratingProlog); assert(m_compiler->opts.IsOSR()); @@ -9999,18 +9999,29 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters() osrAdditionalIntCalleeSaves &= ~regBit; } } +#else + +//------------------------------------------------------------------------ +// genOSRHandleTier0CalleeSavedRegistersAndFrame: +// Not called for x86 without OSR support. +// +void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() +{ + unreached(); +} + #endif // TARGET_AMD64 //------------------------------------------------------------------------ // genPushCalleeSavedRegisters: Push any callee-saved registers we have used. // -void CodeGen::genPushCalleeSavedRegisters() +void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed) { assert(m_compiler->compGeneratingProlog); #if DEBUG // OSR root frames must handle this differently. See - // genOSRRecordTier0CalleeSavedRegisters() + // genOSRHandleTier0CalleeSavedRegistersAndFrame() // genOSRSaveRemainingCalleeSavedRegisters() // if (m_compiler->opts.IsOSR()) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 9461a56ff9d71f..b1542a133ec3fd 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5724,17 +5724,30 @@ void Compiler::generatePatchpointInfo() patchpointInfo->AsyncSynchronizationContextOffset()); } -#if defined(TARGET_AMD64) // Record callee save registers. - // Currently only needed for x64. // regMaskTP rsPushRegs = codeGen->regSet.rsGetModifiedCalleeSavedRegsMask(); rsPushRegs |= RBM_FPBASE; - patchpointInfo->SetCalleeSaveRegisters((uint64_t)rsPushRegs); +#if defined(TARGET_ARM64) + rsPushRegs |= RBM_LR; +#elif defined(TARGET_LOONGARCH64) + rsPushRegs |= RBM_RA; +#elif defined(TARGET_RISCV64) + rsPushRegs |= RBM_RA; +#endif + +#ifdef TARGET_ARM64 + // For arm64 we communicate whether fp/lr are stored with the callee saves in this mask. + if (!codeGen->IsSaveFpLrWithAllCalleeSavedRegisters()) + { + rsPushRegs &= ~(RBM_FP | RBM_LR); + } +#endif + + patchpointInfo->SetCalleeSaveRegisters((uint64_t)rsPushRegs.getLow()); JITDUMP("--OSR-- Tier0 callee saves: "); - JITDUMPEXEC(dspRegMask((regMaskTP)patchpointInfo->CalleeSaveRegisters())); + JITDUMPEXEC(dspRegMask(regMaskTP((regMaskSmall)patchpointInfo->CalleeSaveRegisters()))); JITDUMP("\n"); -#endif // Register this with the runtime. info.compCompHnd->setPatchpointInfo(patchpointInfo); diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 98d65c17cb2f43..fef4e1175a2235 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1756,11 +1756,10 @@ extern "C" void JIT_PatchpointWorkerWorkerWithPolicy(TransitionBlock * pTransiti SetSSP(pFrameContext, ssp - 8); } #endif // TARGET_WINDOWS - - pFrameContext->Rbp = currentFP; -#endif // TARGET_AMD64 +#endif // TARGET_AMD64 SetSP(pFrameContext, currentSP); + SetFP(pFrameContext, currentFP); // Note we can get here w/o triggering, if there is an existing OSR method and // we hit the patchpoint. diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index ffc5dd3f8b4bd9..fda3b8af014b87 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -11414,6 +11414,26 @@ void CEEJitInfo::setPatchpointInfo(PatchpointInfo* patchpointInfo) // We receive ownership of the array _ASSERTE(m_pPatchpointInfoFromJit == NULL); m_pPatchpointInfoFromJit = patchpointInfo; + +#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) + if (m_jitFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_ALT_JIT)) + { + uint32_t ppiSize = patchpointInfo->PatchpointInfoSize(); + + AllocMemTracker am; + void* mem = am.Track(m_pMethodBeingCompiled->GetLoaderAllocator()->GetLowFrequencyHeap()->AllocMem(S_SIZE_T(ppiSize))); + PatchpointInfo *newPpi = new (mem) PatchpointInfo; + newPpi->Initialize(patchpointInfo->NumberOfLocals(), patchpointInfo->TotalFrameSize()); + newPpi->Copy(patchpointInfo); + + HRESULT hr = m_pMethodBeingCompiled->SetMethodDescAltJitPatchpointInfo(newPpi); + if (SUCCEEDED(hr)) + { + am.SuppressRelease(); + } + } +#endif + #else UNREACHABLE(); #endif @@ -11437,6 +11457,18 @@ PatchpointInfo* CEEJitInfo::getOSRInfo(unsigned* ilOffset) #ifdef FEATURE_ON_STACK_REPLACEMENT result = m_pPatchpointInfoFromRuntime; *ilOffset = m_ilOffset; + +#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) + if (m_jitFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_ALT_JIT)) + { + PatchpointInfo* ppi = m_pMethodBeingCompiled->GetMethodDescAltJitPatchpointInfo(); + if (ppi != NULL) + { + result = ppi; + } + } +#endif + #endif EE_TO_JIT_TRANSITION(); diff --git a/src/coreclr/vm/method.cpp b/src/coreclr/vm/method.cpp index 48a41f486b8d01..79040c69c9e08b 100644 --- a/src/coreclr/vm/method.cpp +++ b/src/coreclr/vm/method.cpp @@ -274,6 +274,29 @@ void MethodDesc::SetMethodDescOptimizationTier(NativeCodeVersion::OptimizationTi _ASSERTE(m_codeData != NULL); VolatileStoreWithoutBarrier(&m_codeData->OptimizationTier, tier); } + +#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) +HRESULT MethodDesc::SetMethodDescAltJitPatchpointInfo(PatchpointInfo* pInfo) +{ + WRAPPER_NO_CONTRACT; + + HRESULT hr; + IfFailRet(EnsureCodeDataExists(NULL)); + + _ASSERTE(m_codeData != NULL); + VolatileStoreWithoutBarrier(&m_codeData->AltJitPatchpointInfo, pInfo); + return S_OK; +} + +PatchpointInfo* MethodDesc::GetMethodDescAltJitPatchpointInfo() +{ + WRAPPER_NO_CONTRACT; + if (m_codeData == NULL) + return nullptr; + return VolatileLoadWithoutBarrier(&m_codeData->AltJitPatchpointInfo); +} +#endif // _DEBUG && ALLOW_SXS_JIT + #endif // FEATURE_CODE_VERSIONING #ifdef FEATURE_INTERPRETER diff --git a/src/coreclr/vm/method.hpp b/src/coreclr/vm/method.hpp index fa037eea4bd487..3138b7327bd612 100644 --- a/src/coreclr/vm/method.hpp +++ b/src/coreclr/vm/method.hpp @@ -262,6 +262,9 @@ struct MethodDescCodeData final #ifdef FEATURE_INTERPRETER CallStubHeader *CallStub; #endif // FEATURE_INTERPRETER +#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) + PatchpointInfo *AltJitPatchpointInfo; +#endif // _DEBUG && ALLOW_SXS_JIT }; using PTR_MethodDescCodeData = DPTR(MethodDescCodeData); @@ -1990,6 +1993,11 @@ class MethodDesc #ifndef DACCESS_COMPILE HRESULT SetMethodDescVersionState(PTR_MethodDescVersioningState state); void SetMethodDescOptimizationTier(NativeCodeVersion::OptimizationTier tier); + +#if defined(_DEBUG) && defined(ALLOW_SXS_JIT) + HRESULT SetMethodDescAltJitPatchpointInfo(PatchpointInfo* pInfo); + PatchpointInfo* GetMethodDescAltJitPatchpointInfo(); +#endif #endif // !DACCESS_COMPILE PTR_MethodDescVersioningState GetMethodDescVersionState(); NativeCodeVersion::OptimizationTier GetMethodDescOptimizationTier(); From 396c868c675c526b90dca87ec4456c35bfad75fa Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 23 Apr 2026 10:44:19 +0100 Subject: [PATCH 21/65] Strip return address for correct signing in prolog of Tier0 OSR --- src/coreclr/jit/codegenarm64.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 8880e8fe6b2ebc..35ca7ff095caec 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5663,6 +5663,14 @@ void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() genRestoreRegPair(REG_FP, REG_LR, REG_FP, 0, 0, false, REG_IP1, nullptr, /* reportUnwindData */ false); + if (JitConfig.JitPacEnabled() != 0) + { + // The Tier0 frame saved LR signed with its own SP. Strip it here so the + // OSR prolog can re-sign LR with the OSR frame's SP via PACIASP. + // TODO-PAC: Authenticate the LR instead of stripping. Need to calculate correct SP for it. + GetEmitter()->emitIns(INS_xpaclri); + } + // Emit phantom unwind data for the tier0 frame. m_compiler->unwindAllocStack(patchpointInfo->TotalFrameSize()); // Emit nops to make the prolog 1:1 in unwind codes to instructions. This From f7d4548fdbf00ad67fd2b7564dc5728264484f17 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Mon, 27 Apr 2026 12:14:41 +0100 Subject: [PATCH 22/65] Avoid stripping while restoring Tier0 registers for OSR --- src/coreclr/jit/codegenarm64.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 35ca7ff095caec..880302b309f98f 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5665,10 +5665,14 @@ void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() if (JitConfig.JitPacEnabled() != 0) { - // The Tier0 frame saved LR signed with its own SP. Strip it here so the - // OSR prolog can re-sign LR with the OSR frame's SP via PACIASP. - // TODO-PAC: Authenticate the LR instead of stripping. Need to calculate correct SP for it. - GetEmitter()->emitIns(INS_xpaclri); + // Tier0 signed LR with the Tier0 caller SP before allocating its frame. + // Recreate that SP from the current Tier0 body SP so we can authenticate + // LR before the OSR prolog later re-signs it with the OSR SP via PACIASP. + genInstrWithConstant(INS_add, EA_PTRSIZE, REG_IP0, REG_SPBASE, patchpointInfo->TotalFrameSize(), REG_IP0, + /* inUnwindRegion */ false); + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_IP1, REG_LR, /* canSkip */ false); + GetEmitter()->emitIns(INS_autia1716); + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_LR, REG_IP1, /* canSkip */ false); } // Emit phantom unwind data for the tier0 frame. From a1768bd7599083fc5ce9e804b05a87a6afe45430 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Tue, 28 Apr 2026 13:23:05 +0100 Subject: [PATCH 23/65] Keep encrypted LR saved in a slot while creating hijack frame --- src/coreclr/vm/arm64/asmhelpers.S | 15 ++++++++++++++- src/coreclr/vm/arm64/asmhelpers.asm | 12 ++++++++++++ src/coreclr/vm/threadsuspend.cpp | 20 ++++++++++++++------ 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 12c28267a689cb..c592d44596f15b 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -167,7 +167,7 @@ NESTED_END OnHijackTripThread, _TEXT LEAF_END PacStripPtr, _TEXT // void* PacSignPtr(void *, void *); -// This function sign the input pointer using zero as salt. +// This function sign the input pointer using SP as salt. // To avoid failing on non-PAC enabled machines, we use pacia1716 which signs lr explicitly. // Thus we need to move input in lr, sign it and then copy it back to the result register. .arch_extension pauth @@ -179,6 +179,19 @@ NESTED_END OnHijackTripThread, _TEXT ret LEAF_END PacSignPtr, _TEXT +// void* PacAuthPtr(void *, void *); +// This function authenticates the input signed-pointer using x1 as salt. +// To avoid failing on non-PAC enabled machines, we use pacia1716 which authenticates lr explicitly. +// Thus we need to move input in lr, authenticate it and then copy it back to the result register. +.arch_extension pauth + LEAF_ENTRY PacAuthPtr, _TEXT + mov x17, x0 + mov x16, x1 + autia1716 + mov x0, x17 + ret + LEAF_END PacAuthPtr, _TEXT + // ------------------------------------------------------------------ // Redirection Stub for GC in fully interruptible method //GenerateRedirectedHandledJITCaseStub GCThreadControl diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 29a09664b876c3..32381b97e4a28b 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -342,6 +342,18 @@ NoFloatingPointRetVal ret LEAF_END PacSignPtr +; void* PacAuthPtr(void *, void *); +; This function authenticates the input signed-pointer using x1 as salt. +; To avoid failing on non-PAC enabled machines, we use pacia1716 which authenticates lr explicitly. +; Thus we need to move input in lr, authenticate it and then copy it back to the result register. + LEAF_ENTRY PacAuthPtr + mov x17, x0 + mov x16, x1 + DCD 0xD503219F ; autia1716 instruction in binary to avoid error while compiling with non-PAC enabled compilers + mov x0, x17 + ret + LEAF_END PacAuthPtr + ;; ------------------------------------------------------------------ ;; Redirection Stub for GC in fully interruptible method GenerateRedirectedHandledJITCaseStub GCThreadControl diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 25d26fc9b401d5..b7b133af1d860c 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -24,7 +24,7 @@ #if defined(TARGET_ARM64) extern "C" void* PacSignPtr(void* ptr, void* sp); -extern "C" void* PacStripPtr(void* ptr); +extern "C" void* PacAuthPtr(void* ptr, void* sp); #endif // TARGET_ARM64 bool ThreadSuspend::s_fSuspendRuntimeInProgress = false; @@ -4828,16 +4828,24 @@ void STDCALL OnHijackWorker(HijackArgs * pArgs) thread->ResetThreadState(Thread::TS_Hijacked); - // Fix up our caller's stack, so it can resume from the hijack correctly + // Keep the actual resume address in the saved LR slot. HijackFrame needs a + // canonical managed PC for stackwalk/GC, but OnHijackTripThread will later + // return via the saved LR in HijackArgs. + pArgs->ReturnAddress = (size_t)thread->m_pvHJRetAddr; + #if defined(TARGET_ARM64) - pArgs->ReturnAddress = (size_t)PacStripPtr(thread->m_pvHJRetAddr); + void* hijackFrameReturnAddress = thread->m_pvHJRetAddr; + if (thread->m_pSpForPacSign != nullptr) + { + hijackFrameReturnAddress = PacAuthPtr(hijackFrameReturnAddress, thread->m_pSpForPacSign); + } #else - pArgs->ReturnAddress = (size_t)thread->m_pvHJRetAddr; -#endif //TARGET_ARM64 + void* hijackFrameReturnAddress = thread->m_pvHJRetAddr; +#endif // TARGET_ARM64 // Build a frame so that stack crawling can proceed from here back to where // we will resume execution. - HijackFrame frame((void *)pArgs->ReturnAddress, thread, pArgs); + HijackFrame frame(hijackFrameReturnAddress, thread, pArgs); #ifdef _DEBUG BOOL GCOnTransition = FALSE; From 1827095ada94263681a94a7b80b7f56826fe43c9 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Tue, 28 Apr 2026 13:22:10 +0100 Subject: [PATCH 24/65] Remove todo from libunwind to use strip instead of auth --- .../llvm-libunwind/src/DwarfInstructions.hpp | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/native/external/llvm-libunwind/src/DwarfInstructions.hpp b/src/native/external/llvm-libunwind/src/DwarfInstructions.hpp index b2dbedb692ad8e..1fc8baa8036b07 100644 --- a/src/native/external/llvm-libunwind/src/DwarfInstructions.hpp +++ b/src/native/external/llvm-libunwind/src/DwarfInstructions.hpp @@ -365,18 +365,7 @@ int DwarfInstructions::stepWithDwarf( if (cieInfo.addressesSignedWithBKey) asm("hint 0xe" : "+r"(x17) : "r"(x16)); // autib1716 else - { - //TODO-PAC: Restore the authentication with A key when signing with SP is in place. - //asm("hint 0xc" : "+r"(x17) : "r"(x16)); // autia1716 - __asm__ __volatile__ ("mov x9, lr\n\t" - "mov lr, %0\n\t" - "xpaclri\n\t" - "mov %0, lr\n\t" - "mov lr, x9" - : "+r"(x17) - : - : "x9", "lr"); // strip PAC - } + asm("hint 0xc" : "+r"(x17) : "r"(x16)); // autia1716 } returnAddress = x17; #endif From b43e5e74a55d6597b1f4315156acf338a9ec5171 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Wed, 29 Apr 2026 11:20:32 +0100 Subject: [PATCH 25/65] Use auth instead of strip in windows unwinder --- src/coreclr/unwinder/arm64/unwinder.cpp | 28 ++++++++++++++----------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/coreclr/unwinder/arm64/unwinder.cpp b/src/coreclr/unwinder/arm64/unwinder.cpp index 68433f05d342cf..8b6f49e2ba09b7 100644 --- a/src/coreclr/unwinder/arm64/unwinder.cpp +++ b/src/coreclr/unwinder/arm64/unwinder.cpp @@ -32,7 +32,7 @@ #endif #if !defined(DACCESS_COMPILE) && !defined(FEATURE_CDAC_UNWINDER) -extern "C" void* PacStripPtr(void* ptr); +extern "C" void* PacAuthPtr(void* ptr, void* sp); #endif // !defined(DACCESS_COMPILE) && !defined(FEATURE_CDAC_UNWINDER) #ifdef HOST_UNIX @@ -258,22 +258,24 @@ do { // Macros for stripping pointer authentication (PAC) bits. #if !defined(DACCESS_COMPILE) && !defined(FEATURE_CDAC_UNWINDER) -#define STRIP_PAC(pointer) RtlStripPacOnline(pointer) +#define HANDLE_PAC(pointer, sp) RtlStripPacOnline(pointer, sp) FORCEINLINE -VOID RtlStripPacOnline(_Inout_ PULONG64 Pointer) +VOID RtlStripPacOnline(_Inout_ PULONG64 Pointer, _In_ ULONG64 Sp) /*++ Routine Description: - This routine strips the ARM64 Pointer Authentication Code (PAC) from a - pointer using the ARM64-native xpaci intrinsic directly. Hence this should - only be called when stripping a pointer at runtime (not debugger) + This routine authenticates an ARM64 pointer authenticated with PACIASP + using the supplied stack pointer as the modifier. Hence this should only + be called when authenticating a pointer at runtime (not debugger). Arguments: - Pointer - Supplies a pointer to the pointer whose PAC will be stripped. + Pointer - Supplies a pointer to the pointer whose PAC will be authenticated. + + Sp - Supplies the stack pointer value that was used as the PAC modifier. Return Value: @@ -282,17 +284,18 @@ Return Value: --*/ { - *Pointer = (ULONG64)PacStripPtr((void *) (*Pointer)); + *Pointer = (ULONG64)PacAuthPtr((void *)(*Pointer), (void *)Sp); } #else -#define STRIP_PAC(pointer) RtlStripPacManual(pointer) +#define HANDLE_PAC(pointer, sp) RtlStripPacManual(pointer, sp) FORCEINLINE VOID RtlStripPacManual( - _Inout_ PULONG64 Pointer -) + _Inout_ PULONG64 Pointer, + _In_ ULONG64 Sp + ) /*++ Routine Description: @@ -315,6 +318,7 @@ Return Value: --*/ { + UNREFERENCED_PARAMETER(Sp); *Pointer &= 0x0000FFFFFFFFFFFF; return; } @@ -2403,7 +2407,7 @@ Return Value: } // TODO-PAC: Authenticate instead of stripping the return address. - STRIP_PAC(&ContextRecord->Lr); + HANDLE_PAC(&ContextRecord->Lr, ContextRecord->Sp); // // TODO: Implement support for UnwindFlags RTL_VIRTUAL_UNWIND2_VALIDATE_PAC. From e5d5ca38856f51de3f3d9983a43d56481b791c29 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 30 Apr 2026 08:37:03 +0100 Subject: [PATCH 26/65] Keep signed return address for the hijackframe and authenticate it on demand --- src/coreclr/vm/arm64/stubs.cpp | 2 +- src/coreclr/vm/frames.h | 11 +++++++- src/coreclr/vm/threadsuspend.cpp | 45 +++++++++++++++++++++----------- 3 files changed, 41 insertions(+), 17 deletions(-) diff --git a/src/coreclr/vm/arm64/stubs.cpp b/src/coreclr/vm/arm64/stubs.cpp index 89e9c3f8b45727..60298085c77d2a 100644 --- a/src/coreclr/vm/arm64/stubs.cpp +++ b/src/coreclr/vm/arm64/stubs.cpp @@ -474,7 +474,7 @@ void HijackFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats pRD->IsCallerContextValid = FALSE; pRD->IsCallerSPValid = FALSE; - pRD->pCurrentContext->Pc = m_ReturnAddress; + pRD->pCurrentContext->Pc = GetReturnAddress(); size_t s = sizeof(struct HijackArgs); _ASSERTE(s%8 == 0); // HijackArgs contains register values and hence will be a multiple of 8 // stack must be multiple of 16. So if s is not multiple of 16 then there must be padding of 8 bytes diff --git a/src/coreclr/vm/frames.h b/src/coreclr/vm/frames.h index f3fccab5615efa..9775163f63e91d 100644 --- a/src/coreclr/vm/frames.h +++ b/src/coreclr/vm/frames.h @@ -1299,6 +1299,8 @@ class HijackFrame : public Frame m_ReturnAddress); } + PCODE GetReturnAddress_Impl(); + BOOL NeedsUpdateRegDisplay_Impl() { LIMITED_METHOD_CONTRACT; @@ -1326,11 +1328,18 @@ class HijackFrame : public Frame // HijackFrames are created by trip functions. See OnHijackTripThread() // They are real C++ objects on the stack. // So, it's a public function -- but that doesn't mean you should make some. - HijackFrame(LPVOID returnAddress, Thread *thread, HijackArgs *args); + HijackFrame(LPVOID returnAddress, Thread *thread, HijackArgs *args +#if defined(TARGET_ARM64) + , LPVOID spForPacSign +#endif + ); protected: TADDR m_ReturnAddress; +#if defined(TARGET_ARM64) + TADDR m_SpForPacSign; +#endif PTR_Thread m_Thread; DPTR(HijackArgs) m_Args; diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index b7b133af1d860c..70d697a25e785e 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -4795,9 +4795,30 @@ StackWalkAction SWCB_GetExecutionState(CrawlFrame *pCF, VOID *pData) return action; } -HijackFrame::HijackFrame(LPVOID returnAddress, Thread *thread, HijackArgs *args) +PCODE HijackFrame::GetReturnAddress_Impl() +{ + LIMITED_METHOD_DAC_CONTRACT; + +#if defined(TARGET_ARM64) && !defined(DACCESS_COMPILE) + if ((CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_JitPacEnabled) == 1) && (m_SpForPacSign != 0)) + { + return (PCODE)PacAuthPtr((void*)m_ReturnAddress, (void*)m_SpForPacSign); + } +#endif + + return (PCODE)m_ReturnAddress; +} + +HijackFrame::HijackFrame(LPVOID returnAddress, Thread *thread, HijackArgs *args +#if defined(TARGET_ARM64) + , LPVOID spForPacSign +#endif + ) : Frame(FrameIdentifier::HijackFrame), m_ReturnAddress((TADDR)returnAddress), +#if defined(TARGET_ARM64) + m_SpForPacSign((TADDR)spForPacSign), +#endif m_Thread(thread), m_Args(args) { @@ -4828,24 +4849,18 @@ void STDCALL OnHijackWorker(HijackArgs * pArgs) thread->ResetThreadState(Thread::TS_Hijacked); - // Keep the actual resume address in the saved LR slot. HijackFrame needs a - // canonical managed PC for stackwalk/GC, but OnHijackTripThread will later - // return via the saved LR in HijackArgs. + // Keep the actual resume address in the saved LR slot. HijackFrame + // authenticates the return address on demand for stackwalk/GC, but + // OnHijackTripThread will later return via the saved LR in HijackArgs. pArgs->ReturnAddress = (size_t)thread->m_pvHJRetAddr; -#if defined(TARGET_ARM64) - void* hijackFrameReturnAddress = thread->m_pvHJRetAddr; - if (thread->m_pSpForPacSign != nullptr) - { - hijackFrameReturnAddress = PacAuthPtr(hijackFrameReturnAddress, thread->m_pSpForPacSign); - } -#else - void* hijackFrameReturnAddress = thread->m_pvHJRetAddr; -#endif // TARGET_ARM64 - // Build a frame so that stack crawling can proceed from here back to where // we will resume execution. - HijackFrame frame(hijackFrameReturnAddress, thread, pArgs); + HijackFrame frame(thread->m_pvHJRetAddr, thread, pArgs +#if defined(TARGET_ARM64) + , thread->m_pSpForPacSign +#endif + ); #ifdef _DEBUG BOOL GCOnTransition = FALSE; From 067c38fa196d171af6565956fa640306e5191c60 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 30 Apr 2026 13:15:13 +0100 Subject: [PATCH 27/65] Fix build issues on macos --- src/coreclr/vm/arm64/cgencpu.h | 4 ++++ src/coreclr/vm/frames.h | 14 +++++++++++++- src/coreclr/vm/threadsuspend.cpp | 14 -------------- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index 281bdcfbdfa2a1..480c416f20873b 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -45,6 +45,10 @@ class ComCallMethodDesc; extern PCODE GetPreStubEntryPoint(); +#ifndef DACCESS_COMPILE +extern "C" void* PacAuthPtr(void* ptr, void* sp); +#endif + #define STACK_ALIGN_SIZE 16 #define JUMP_ALLOCATE_SIZE 16 // # bytes to allocate for a jump instruction diff --git a/src/coreclr/vm/frames.h b/src/coreclr/vm/frames.h index 9775163f63e91d..300ed62dd18359 100644 --- a/src/coreclr/vm/frames.h +++ b/src/coreclr/vm/frames.h @@ -1299,7 +1299,19 @@ class HijackFrame : public Frame m_ReturnAddress); } - PCODE GetReturnAddress_Impl(); + PCODE GetReturnAddress_Impl() + { + LIMITED_METHOD_DAC_CONTRACT; + +#if defined(TARGET_ARM64) && !defined(DACCESS_COMPILE) + if ((CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_JitPacEnabled) == 1) && (m_SpForPacSign != 0)) + { + return (PCODE)PacAuthPtr((void*)m_ReturnAddress, (void*)m_SpForPacSign); + } +#endif + + return (PCODE)m_ReturnAddress; + } BOOL NeedsUpdateRegDisplay_Impl() { diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 70d697a25e785e..d0db1b8ab10114 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -4795,20 +4795,6 @@ StackWalkAction SWCB_GetExecutionState(CrawlFrame *pCF, VOID *pData) return action; } -PCODE HijackFrame::GetReturnAddress_Impl() -{ - LIMITED_METHOD_DAC_CONTRACT; - -#if defined(TARGET_ARM64) && !defined(DACCESS_COMPILE) - if ((CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_JitPacEnabled) == 1) && (m_SpForPacSign != 0)) - { - return (PCODE)PacAuthPtr((void*)m_ReturnAddress, (void*)m_SpForPacSign); - } -#endif - - return (PCODE)m_ReturnAddress; -} - HijackFrame::HijackFrame(LPVOID returnAddress, Thread *thread, HijackArgs *args #if defined(TARGET_ARM64) , LPVOID spForPacSign From e1a3695cdfa4df7b78aa040b04fac8211a443e86 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 30 Apr 2026 17:23:59 +0100 Subject: [PATCH 28/65] Handle phantom unwind codes correctly --- .../nativeaot/Runtime/windows/CoffNativeCodeManager.cpp | 2 +- src/coreclr/vm/excep.cpp | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index 04bd83ea1f9d42..274006f2f1b2e9 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -839,7 +839,7 @@ static bool HasPacInUnwindInfo(PTR_VOID pUnwindDataBlob, size_t unwindDataBlobSi while (UnwindCodePtr < UnwindCodesEndPtr) { uint8_t CurCode = *UnwindCodePtr; - if ((CurCode & 0xfe) == 0xe4) // The last unwind code + if (CurCode == 0xe4) // The last unwind code { break; } diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index f39cc3e25441ec..21d6afd4811034 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6429,7 +6429,7 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re for (ULONG_PTR unwindOpPtr = UnwindCodePtr; unwindOpPtr < UnwindCodesEndPtr;) { BYTE curCode = *(BYTE*)unwindOpPtr; - if ((curCode & 0xFE) == 0xE4) // end, end_c + if (curCode == 0xE4) // end { break; } @@ -6449,7 +6449,7 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re for (ULONG_PTR unwindOpPtr = UnwindCodePtr; unwindOpPtr < UnwindCodesEndPtr;) { BYTE curCode = *(BYTE*)unwindOpPtr; - if ((curCode & 0xFE) == 0xE4) // end, end_c + if (curCode == 0xE4) // end { break; } @@ -6599,6 +6599,11 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re continue; } + if (CurCode == 0xE5) // end_c + { + continue; + } + if (CurCode == 0xE6) // save_next { continue; From 146da5e09859f94cf0bd0d5633d63e821db27d76 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Tue, 5 May 2026 13:18:47 +0100 Subject: [PATCH 29/65] Address review comments --- src/coreclr/debug/ee/controller.cpp | 6 ------ src/coreclr/inc/gcinfodecoder.h | 8 +------- src/coreclr/jit/codegenarm64.cpp | 2 -- src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp | 4 ++-- src/coreclr/nativeaot/Runtime/thread.cpp | 2 +- src/coreclr/unwinder/arm64/unwinder.cpp | 8 ++++---- src/coreclr/vm/arm64/cgencpu.h | 1 - src/coreclr/vm/excep.cpp | 4 +++- src/coreclr/vm/tailcallhelp.cpp | 3 ++- 9 files changed, 13 insertions(+), 25 deletions(-) diff --git a/src/coreclr/debug/ee/controller.cpp b/src/coreclr/debug/ee/controller.cpp index baa191a34e4b1f..2b122cddb7fa24 100644 --- a/src/coreclr/debug/ee/controller.cpp +++ b/src/coreclr/debug/ee/controller.cpp @@ -6202,12 +6202,6 @@ static bool IsTailCall(const BYTE * ip, ControllerStackInfo* info, TailCallFunct TailCallTls* tls = GetThread()->GetTailCallTls(); LPVOID tailCallAwareRetAddr = tls->GetFrame()->TailCallAwareReturnAddress; -#if defined(TARGET_ARM64) - //TODO-PAC: Authenticate instead of stripping the return addresses. - retAddr = PacStripPtr(retAddr); - tailCallAwareRetAddr = PacStripPtr(tailCallAwareRetAddr); -#endif // TARGET_ARM64 - LOG((LF_CORDB,LL_INFO1000, "ITCTR: ret addr is %p, tailcall aware ret addr is %p\n", retAddr, tailCallAwareRetAddr)); diff --git a/src/coreclr/inc/gcinfodecoder.h b/src/coreclr/inc/gcinfodecoder.h index ba9de788c29f52..69a45d6d349710 100644 --- a/src/coreclr/inc/gcinfodecoder.h +++ b/src/coreclr/inc/gcinfodecoder.h @@ -76,10 +76,6 @@ typedef void * OBJECTREF; #ifndef __cgencpu_h__ -#if defined(TARGET_ARM64) -extern "C" void* PacStripPtr(void* ptr); -#endif // TARGET_ARM64 - inline void SetIP(T_CONTEXT* context, PCODE rip) { _ASSERTE(!"don't call this"); @@ -109,8 +105,7 @@ inline PCODE GetIP(T_CONTEXT* context) #elif defined(TARGET_ARM) return (PCODE)context->Pc; #elif defined(TARGET_ARM64) - //TODO-PAC: Authenticate instead of stripping the return address. - return (PCODE) PacStripPtr((void *)context->Pc); + return (PCODE)context->Pc; #elif defined(TARGET_LOONGARCH64) return (PCODE)context->Pc; #elif defined(TARGET_RISCV64) @@ -803,4 +798,3 @@ typedef TGcInfoDecoder InterpreterGcInfoDecoder; #endif // _GC_INFO_DECODER_ - diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 67063b537c36de..2143aa7fdeb0bc 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -493,10 +493,8 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, if ((spOffset == 0) && (spDelta >= -512)) { // We can use pre-indexed addressing when the stack adjustment fits in the instruction. - // Generate: // stp REG, REG + 1, [SP, #spDelta]! // 64-bit STP offset range: -512 to 504, multiple of 8. - assert(reg1 != REG_LR); GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX); m_compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta); needToSaveRegs = false; diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index e023bbf05172f6..a260387efa7024 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -1859,7 +1859,7 @@ void StackFrameIterator::NextInternal() ASSERT(!m_pThread->IsHijacked()); #if defined(TARGET_ARM64) - // TODO-PAC: Authenticate instead of stripping the return address. + // We strip the PC here as it's not being used to branch execution to. SetControlPC(PacStripPtr(dac_cast(PCODEToPINSTR(m_RegDisplay.GetIP())))); #else SetControlPC(dac_cast(PCODEToPINSTR(m_RegDisplay.GetIP()))); @@ -2199,7 +2199,7 @@ void StackFrameIterator::CalculateCurrentMethodState() } #if defined(TARGET_ARM64) - //TODO-PAC: Authenticate instead of stripping the return addresses. + // We strip the PC here as it's not being used to branch execution to. m_ControlPC = PacStripPtr(m_ControlPC); #endif // TARGET_ARM64 diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index 4f2db0358ac578..ad3d5d055fab5e 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -823,7 +823,7 @@ void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, Hijack ASSERT(pvRetAddr != NULL); #if defined(TARGET_ARM64) - //TODO-PAC: Authenticate instead of stripping the return addresses. + // We strip the PC here as it's not being used to branch execution to. ASSERT(StackFrameIterator::IsValidReturnAddress(PacStripPtr(pvRetAddr))); #else ASSERT(StackFrameIterator::IsValidReturnAddress(pvRetAddr)); diff --git a/src/coreclr/unwinder/arm64/unwinder.cpp b/src/coreclr/unwinder/arm64/unwinder.cpp index 8b6f49e2ba09b7..92987aecb6ab11 100644 --- a/src/coreclr/unwinder/arm64/unwinder.cpp +++ b/src/coreclr/unwinder/arm64/unwinder.cpp @@ -31,9 +31,9 @@ #define FIELD_OFFSET(type, field) ((LONG)__builtin_offsetof(type, field)) #endif -#if !defined(DACCESS_COMPILE) && !defined(FEATURE_CDAC_UNWINDER) +#if !defined(DACCESS_COMPILE) extern "C" void* PacAuthPtr(void* ptr, void* sp); -#endif // !defined(DACCESS_COMPILE) && !defined(FEATURE_CDAC_UNWINDER) +#endif // !defined(DACCESS_COMPILE) #ifdef HOST_UNIX #define RtlZeroMemory ZeroMemory @@ -258,10 +258,10 @@ do { // Macros for stripping pointer authentication (PAC) bits. #if !defined(DACCESS_COMPILE) && !defined(FEATURE_CDAC_UNWINDER) -#define HANDLE_PAC(pointer, sp) RtlStripPacOnline(pointer, sp) +#define HANDLE_PAC(pointer, sp) RtlHandlePacOnline(pointer, sp) FORCEINLINE -VOID RtlStripPacOnline(_Inout_ PULONG64 Pointer, _In_ ULONG64 Sp) +VOID RtlHandlePacOnline(_Inout_ PULONG64 Pointer, _In_ ULONG64 Sp) /*++ diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index 480c416f20873b..802fcce7af29a7 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -212,7 +212,6 @@ typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA inline PCODE GetIP(const T_CONTEXT * context) { LIMITED_METHOD_DAC_CONTRACT; - //TODO-PAC: Strip/Authenticate while populating the context. return (PCODE) context->Pc; } diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 870d1adfa1244e..1d6482f8f5ab3e 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6323,7 +6323,9 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re *pSpForPacSign = 0; - // In prolog or epilog while the current frame is still being established or torn down we cannot retrieve correct SP reliably. + // In prolog or epilog while the current frame is still being established or torn down + // retrieving correct SP is complex. We conservatively bail-out in this case. + // TODO-PAC: Explore opportunities to retrieve SP while in prolog/epilog. if (IsIPInProlog(pCodeInfo)) { return false; diff --git a/src/coreclr/vm/tailcallhelp.cpp b/src/coreclr/vm/tailcallhelp.cpp index fa8ea22a9b4ad6..92efd671e2a6d0 100644 --- a/src/coreclr/vm/tailcallhelp.cpp +++ b/src/coreclr/vm/tailcallhelp.cpp @@ -45,7 +45,8 @@ FCIMPL2(void*, TailCallHelp::GetTailCallInfo, void** retAddrSlot, void** retAddr void* retAddrFromSlot = thread->GetReturnAddress(retAddrSlot); #if defined(TARGET_ARM64) - //TODO-PAC: Authenticate instead of stripping the return address. + // We strip the return address here as it's only used for comparison and + // not being used to branch execution to. retAddrFromSlot = PacStripPtr(retAddrFromSlot); #endif // TARGET_ARM64 *retAddr = retAddrFromSlot; From ebad54507c73efa0aa2bffdbe6d04974e55360c0 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Tue, 5 May 2026 13:32:07 +0100 Subject: [PATCH 30/65] Use xpaci instead xpaclri for stripping --- src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S | 13 ++++--------- src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm | 11 +++-------- src/coreclr/vm/arm64/asmhelpers.S | 10 ++-------- src/coreclr/vm/arm64/asmhelpers.asm | 11 +++-------- 4 files changed, 12 insertions(+), 33 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S index b0c1e8da846c6f..20c2324974c005 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S @@ -5,16 +5,11 @@ #include "AsmOffsets.inc" // void* PacStripPtr(void *); -// This function strips the pointer of PAC info that is passed as an agrument. -// To avoid failing on non-PAC enabled machines, we use xpaclri (instead of xpaci) which strips lr explicitly. -// Thus we move need to move input in lr, strip it and copy it back to the result register. +// This function strips the pointer of PAC info that is passed as an argument. .arch_extension pauth LEAF_ENTRY PacStripPtr, _TEXT - mov x9, lr - mov lr, x0 - xpaclri - mov x0, lr - ret x9 + xpaci x0 + ret LEAF_END PacStripPtr, _TEXT // void* PacSignPtr(void *, void *); @@ -27,4 +22,4 @@ pacia1716 mov x0, x17 ret - LEAF_END PacSignPtr, _TEXT \ No newline at end of file + LEAF_END PacSignPtr, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm index fae95d519db4d4..30a32ea94d0117 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm @@ -6,15 +6,10 @@ TEXTAREA ; void* PacStripPtr(void *); -; This function strips the pointer of PAC info that is passed as an agrument. -; To avoid failing on non-PAC enabled machines, we use xpaclri (instead of xpaci) which strips lr explicitly. -; Thus we move need to move input in lr, strip it and copy it back to the result register. +; This function strips the pointer of PAC info that is passed as an argument. LEAF_ENTRY PacStripPtr - mov x9, lr - mov lr, x0 - DCD 0xD50320FF ; xpaclri instruction in binary to avoid error while compiling with non-PAC enabled compilers - mov x0, lr - ret x9 + DCD 0xDAC143E0 ; xpaci x0 instruction in binary to avoid requiring PAC-enabled assemblers + ret LEAF_END PacStripPtr ; void* PacSignPtr(void *, void *); diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index c592d44596f15b..9a1b8a23b51928 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -155,15 +155,10 @@ NESTED_END OnHijackTripThread, _TEXT // void* PacStripPtr(void *); // This function strips the pointer of PAC info that is passed as an argument. -// To avoid failing on non-PAC enabled machines, we use xpaclri (instead of xpaci) which strips lr explicitly. -// Thus we move need to move input in lr, strip it and copy it back to the result register. .arch_extension pauth LEAF_ENTRY PacStripPtr, _TEXT - mov x9, lr - mov lr, x0 - xpaclri - mov x0, lr - ret x9 + xpaci x0 + ret LEAF_END PacStripPtr, _TEXT // void* PacSignPtr(void *, void *); @@ -3552,4 +3547,3 @@ NESTED_ENTRY IL_Rethrow, _TEXT, NoHandler // Should never return brk #0 NESTED_END IL_Rethrow, _TEXT - diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 32381b97e4a28b..2d0868976b4912 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -319,15 +319,10 @@ NoFloatingPointRetVal #endif ; FEATURE_HIJACK ; void* PacStripPtr(void *); -; This function strips the pointer of PAC info that is passed as an agrument. -; To avoid failing on non-PAC enabled machines, we use xpaclri (instead of xpaci) which strips lr explicitly. -; Thus we move need to move input in lr, strip it and copy it back to the result register. +; This function strips the pointer of PAC info that is passed as an argument. LEAF_ENTRY PacStripPtr - mov x9, lr - mov lr, x0 - DCD 0xD50320FF ; xpaclri instruction in binary to avoid error while compiling with non-PAC enabled compilers - mov x0, lr - ret x9 + DCD 0xDAC143E0 ; xpaci x0 instruction in binary to avoid requiring PAC-enabled assemblers + ret LEAF_END PacStripPtr ; void* PacSignPtr(void *, void *); From 89a9ded6a86015fa0504f6cbbe911b50c240abca Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Tue, 5 May 2026 21:34:13 +0100 Subject: [PATCH 31/65] Incorporate review comment --- src/coreclr/inc/clrconfigvalues.h | 1 - src/coreclr/jit/codegenarm64.cpp | 2 ++ src/coreclr/unwinder/arm64/unwinder.cpp | 5 ++--- src/coreclr/vm/frames.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 0a0ee1e2c3208f..c9dd7c485c99c0 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -710,7 +710,6 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Sm4, W("EnableArm64Sm RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64SveAes, W("EnableArm64SveAes"), 1, "Allows Arm64 SveAes+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64SveSha3, W("EnableArm64SveSha3"), 1, "Allows Arm64 SveSha3+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64SveSm4, W("EnableArm64SveSm4"), 1, "Allows Arm64 SveSm4+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitPacEnabled, W("JitPacEnabled"), 1, "Allows Arm64 Pointer Authentication (PAC) to be disabled") #elif defined(TARGET_RISCV64) RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableRiscV64Zba, W("EnableRiscV64Zba"), 1, "Allows RiscV64 Zba hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableRiscV64Zbb, W("EnableRiscV64Zbb"), 1, "Allows RiscV64 Zbb hardware intrinsics to be disabled") diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 2143aa7fdeb0bc..e1b961dccc7bd3 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -1602,6 +1602,7 @@ void CodeGen::genFuncletEpilog(BasicBlock* /* block */) { GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); m_compiler->unwindSaveRegPair(REG_FP, REG_LR, 0); + genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, REG_SCRATCH, nullptr, /* reportUnwindData */ true); } else @@ -1632,6 +1633,7 @@ void CodeGen::genFuncletEpilog(BasicBlock* /* block */) else if (genFuncletInfo.fiFrameType == 3) { // With OSR we may see large values for fiSpDelta1 + // if (m_compiler->opts.IsOSR()) { GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0); diff --git a/src/coreclr/unwinder/arm64/unwinder.cpp b/src/coreclr/unwinder/arm64/unwinder.cpp index 92987aecb6ab11..f3c398c123af66 100644 --- a/src/coreclr/unwinder/arm64/unwinder.cpp +++ b/src/coreclr/unwinder/arm64/unwinder.cpp @@ -256,7 +256,7 @@ do { #endif // !defined(DEBUGGER_UNWIND) // Macros for stripping pointer authentication (PAC) bits. -#if !defined(DACCESS_COMPILE) && !defined(FEATURE_CDAC_UNWINDER) +#if !defined(DACCESS_COMPILE) #define HANDLE_PAC(pointer, sp) RtlHandlePacOnline(pointer, sp) @@ -323,7 +323,7 @@ Return Value: return; } -#endif // !defined(DACCESS_COMPILE) && !defined(FEATURE_CDAC_UNWINDER) +#endif // !defined(DACCESS_COMPILE) // // Macros to clarify opcode parsing @@ -2406,7 +2406,6 @@ Return Value: return STATUS_UNWIND_INVALID_SEQUENCE; } - // TODO-PAC: Authenticate instead of stripping the return address. HANDLE_PAC(&ContextRecord->Lr, ContextRecord->Sp); // diff --git a/src/coreclr/vm/frames.h b/src/coreclr/vm/frames.h index eaf039d8eda4c3..e0b263fe82b04c 100644 --- a/src/coreclr/vm/frames.h +++ b/src/coreclr/vm/frames.h @@ -1304,7 +1304,7 @@ class HijackFrame : public Frame LIMITED_METHOD_DAC_CONTRACT; #if defined(TARGET_ARM64) && !defined(DACCESS_COMPILE) - if ((CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_JitPacEnabled) == 1) && (m_SpForPacSign != 0)) + if (m_SpForPacSign != 0) { return (PCODE)PacAuthPtr((void*)m_ReturnAddress, (void*)m_SpForPacSign); } From e17d8e9827a0ac4ed73ed5664ad2f947f9a79925 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Wed, 6 May 2026 15:31:17 +0100 Subject: [PATCH 32/65] Refactor GetPacSignInfo --- .../nativeaot/Runtime/StackFrameIterator.cpp | 2 - .../nativeaot/Runtime/arm64/MiscStubs.S | 1 + .../nativeaot/Runtime/arm64/MiscStubs.asm | 1 + src/coreclr/nativeaot/Runtime/thread.cpp | 1 - src/coreclr/vm/arm64/asmhelpers.S | 1 + src/coreclr/vm/arm64/asmhelpers.asm | 1 + src/coreclr/vm/excep.cpp | 96 +++++-------------- src/coreclr/vm/threadsuspend.cpp | 2 +- 8 files changed, 27 insertions(+), 78 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index a260387efa7024..c22985a331ef5d 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -1859,7 +1859,6 @@ void StackFrameIterator::NextInternal() ASSERT(!m_pThread->IsHijacked()); #if defined(TARGET_ARM64) - // We strip the PC here as it's not being used to branch execution to. SetControlPC(PacStripPtr(dac_cast(PCODEToPINSTR(m_RegDisplay.GetIP())))); #else SetControlPC(dac_cast(PCODEToPINSTR(m_RegDisplay.GetIP()))); @@ -2199,7 +2198,6 @@ void StackFrameIterator::CalculateCurrentMethodState() } #if defined(TARGET_ARM64) - // We strip the PC here as it's not being used to branch execution to. m_ControlPC = PacStripPtr(m_ControlPC); #endif // TARGET_ARM64 diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S index 20c2324974c005..1d7afd19cd73f4 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S @@ -6,6 +6,7 @@ // void* PacStripPtr(void *); // This function strips the pointer of PAC info that is passed as an argument. +// We prefer to strip a pointer where it's not going to be used to branch execution to. .arch_extension pauth LEAF_ENTRY PacStripPtr, _TEXT xpaci x0 diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm index 30a32ea94d0117..320e58cf2b802a 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm @@ -7,6 +7,7 @@ ; void* PacStripPtr(void *); ; This function strips the pointer of PAC info that is passed as an argument. +; We prefer to strip a pointer where it's not going to be used to branch execution to. LEAF_ENTRY PacStripPtr DCD 0xDAC143E0 ; xpaci x0 instruction in binary to avoid requiring PAC-enabled assemblers ret diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index ad3d5d055fab5e..da1195acaa4e64 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -823,7 +823,6 @@ void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, Hijack ASSERT(pvRetAddr != NULL); #if defined(TARGET_ARM64) - // We strip the PC here as it's not being used to branch execution to. ASSERT(StackFrameIterator::IsValidReturnAddress(PacStripPtr(pvRetAddr))); #else ASSERT(StackFrameIterator::IsValidReturnAddress(pvRetAddr)); diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 9a1b8a23b51928..200b864140fa0b 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -155,6 +155,7 @@ NESTED_END OnHijackTripThread, _TEXT // void* PacStripPtr(void *); // This function strips the pointer of PAC info that is passed as an argument. +// We prefer to strip a pointer where it's not going to be used to branch execution to. .arch_extension pauth LEAF_ENTRY PacStripPtr, _TEXT xpaci x0 diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 2d0868976b4912..06d8cf670508a6 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -320,6 +320,7 @@ NoFloatingPointRetVal ; void* PacStripPtr(void *); ; This function strips the pointer of PAC info that is passed as an argument. +; We prefer to strip a pointer where it's not going to be used to branch execution to. LEAF_ENTRY PacStripPtr DCD 0xDAC143E0 ; xpaci x0 instruction in binary to avoid requiring PAC-enabled assemblers ret diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 1d6482f8f5ab3e..430a25556e244d 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6393,26 +6393,7 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re } }; - ULONG unwindOpCount = 0; - for (ULONG_PTR unwindOpPtr = UnwindCodePtr; unwindOpPtr < UnwindCodesEndPtr;) - { - BYTE curCode = *(BYTE*)unwindOpPtr; - if (curCode == 0xE4) // end - { - break; - } - - SIZE_T unwindOpSize = GetUnwindOpSize(curCode); - if ((unwindOpPtr + unwindOpSize) > UnwindCodesEndPtr) - { - return false; - } - - unwindOpCount++; - unwindOpPtr += unwindOpSize; - } - - ULONG_PTR* unwindOpStarts = (ULONG_PTR*)_alloca(unwindOpCount * sizeof(ULONG_PTR)); + ULONG_PTR* unwindOpStarts = (ULONG_PTR*)_alloca((UnwindCodesEndPtr - UnwindCodePtr) * sizeof(ULONG_PTR)); ULONG unwindOpIndex = 0; for (ULONG_PTR unwindOpPtr = UnwindCodePtr; unwindOpPtr < UnwindCodesEndPtr;) { @@ -6433,8 +6414,8 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re } SSIZE_T currentSpOffset = 0; - SSIZE_T pacSpOffset = SSIZE_T_MIN; SSIZE_T lrSlotOffset = SSIZE_T_MIN; + BOOL hasPacSignLR = false; constexpr SSIZE_T PtrSize = 8; // ARM64 prolog unwind codes are stored in reverse prolog order. Replay them in prolog order so @@ -6444,6 +6425,18 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re UnwindCodePtr = unwindOpStarts[--unwindOpIndex]; ULONG CurCode = *(BYTE*)UnwindCodePtr; + if (((CurCode & 0xFC) == 0xC8) || // save_regp + ((CurCode & 0xFE) == 0xD8) || // save_fregp + ((CurCode & 0xFE) == 0xDC) || // save_freg + CurCode == 0xE1 || // set_fp + CurCode == 0xE2 || // add_fp + CurCode == 0xE3 || // nop + CurCode == 0xE5 || // end_c + CurCode == 0xE6) // save_next + { + continue; + } + if ((CurCode & 0xE0) == 0x00) // alloc_s { currentSpOffset -= (CurCode & 0x1F) * 16; @@ -6476,12 +6469,8 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re continue; } - if ((CurCode & 0xFC) == 0xC8) // save_regp - { - continue; - } - - if ((CurCode & 0xFC) == 0xCC) // save_regp_x + if (((CurCode & 0xFC) == 0xCC) || // save_regp_x + ((CurCode & 0xFE) == 0xDA)) // save_fregp_x { ULONG z = *(BYTE*)(UnwindCodePtr + 1) & 0x3F; currentSpOffset -= (z + 1) * 8; @@ -6521,23 +6510,6 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re continue; } - if ((CurCode & 0xFE) == 0xD8) // save_fregp - { - continue; - } - - if ((CurCode & 0xFE) == 0xDA) // save_fregp_x - { - ULONG z = *(BYTE*)(UnwindCodePtr + 1) & 0x3F; - currentSpOffset -= (z + 1) * 8; - continue; - } - - if ((CurCode & 0xFE) == 0xDC) // save_freg - { - continue; - } - if (CurCode == 0xDE) // save_freg_x { ULONG z = *(BYTE*)(UnwindCodePtr + 1) & 0x1F; @@ -6552,46 +6524,22 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re continue; } - if (CurCode == 0xE1) // set_fp - { - continue; - } - - if (CurCode == 0xE2) // add_fp - { - continue; - } - - if (CurCode == 0xE3) // nop - { - continue; - } - - if (CurCode == 0xE5) // end_c - { - continue; - } - - if (CurCode == 0xE6) // save_next - { - continue; - } - if (CurCode == 0xFC) // pac_sign_lr { - if (pacSpOffset == SSIZE_T_MIN) + _ASSERTE(currentSpOffset == 0); + if (currentSpOffset != 0) { - // Snapshot the SP delta for the PACIASP in prolog. - pacSpOffset = currentSpOffset; + return false; } + hasPacSignLR = true; continue; } return false; } - if (pacSpOffset == SSIZE_T_MIN) + if (!hasPacSignLR) { return true; } @@ -6601,7 +6549,7 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re return false; } - *pSpForPacSign = (TADDR)((SSIZE_T)retAddrLocation + pacSpOffset - lrSlotOffset); + *pSpForPacSign = (TADDR)((SSIZE_T)retAddrLocation - lrSlotOffset); return true; } #endif // TARGET_ARM64 diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 54c14c261a646e..4d2be03a2e83c7 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -4482,7 +4482,7 @@ struct ExecutionState #if defined(TARGET_X86) m_FirstPass = true; #elif defined(TARGET_ARM64) - m_pSpForPacSign = nullptr; + m_pSpForPacSign = nullptr; #endif } }; From 22dccb7bfd4492dd354fdcbcfb82cb8b234e4feb Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 7 May 2026 11:20:55 +0100 Subject: [PATCH 33/65] Don't assume sp offset to be zero for PAC signing --- src/coreclr/vm/excep.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 430a25556e244d..74f3eaa5203583 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6526,12 +6526,6 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re if (CurCode == 0xFC) // pac_sign_lr { - _ASSERTE(currentSpOffset == 0); - if (currentSpOffset != 0) - { - return false; - } - hasPacSignLR = true; continue; } From 21bc75a34f506d281f18a2f3a7fcea1efa113016 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 7 May 2026 12:43:49 +0100 Subject: [PATCH 34/65] Canonicalize return addresses in StackFrameIterator for NativeAOT --- .../nativeaot/Runtime/StackFrameIterator.cpp | 38 ++++++++++--------- .../Runtime/unix/UnixNativeCodeManager.cpp | 8 ++++ .../Runtime/windows/CoffNativeCodeManager.cpp | 6 ++- 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index c22985a331ef5d..12af7932742cbf 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -68,6 +68,14 @@ EXTERN_C CODE_LOCATION RhpRethrow2; extern "C" void* PacStripPtr(void* ptr); #endif // TARGET_ARM64 +static TADDR ReturnAddressToCanonicalPC(TADDR returnAddress) +{ +#if defined(TARGET_ARM64) + return (TADDR)PacStripPtr((void*)returnAddress); +#endif // TARGET_ARM64 + return returnAddress; +} + StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PInvokeTransitionFrame* pInitialTransitionFrame) { STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ GC ]\n"); @@ -167,7 +175,7 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PInvokeTransitionF #if !defined(FEATURE_PORTABLE_HELPERS) // @TODO: no portable version of regdisplay memset(&m_RegDisplay, 0, sizeof(m_RegDisplay)); - m_RegDisplay.SetIP((PCODE)PCODEToPINSTR((PCODE)pFrame->m_RIP)); + m_RegDisplay.SetIP(ReturnAddressToCanonicalPC(dac_cast(pFrame->m_RIP))); SetControlPC(dac_cast(m_RegDisplay.GetIP())); PTR_uintptr_t pPreservedRegsCursor = (PTR_uintptr_t)PTR_HOST_MEMBER_TADDR(PInvokeTransitionFrame, pFrame, m_PreservedRegs); @@ -410,14 +418,15 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO // This codepath is used by the hijack stackwalk and we can get arbitrary ControlPCs from there. If this // context has a non-managed control PC, then we're done. - if (!m_pInstance->IsManaged(dac_cast(pCtx->GetIp()))) + TADDR controlPC = ReturnAddressToCanonicalPC(pCtx->GetIp()); + if (!m_pInstance->IsManaged(dac_cast(controlPC))) return; // // control state // m_RegDisplay.SP = pCtx->GetSp(); - m_RegDisplay.IP = PCODEToPINSTR(pCtx->GetIp()); + m_RegDisplay.IP = controlPC; SetControlPC(dac_cast(m_RegDisplay.GetIP())); #ifdef TARGET_ARM @@ -630,14 +639,15 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC // This codepath is used by the hijack stackwalk. The IP must be in managed code // or in a conservatively reported assembly thunk. - ASSERT(IsValidReturnAddress((void*)pCtx->GetIp())); + TADDR controlPC = ReturnAddressToCanonicalPC(pCtx->GetIp()); + ASSERT(IsValidReturnAddress(dac_cast(controlPC))); // // control state // - SetControlPC(dac_cast(pCtx->GetIp())); + SetControlPC(dac_cast(controlPC)); m_RegDisplay.SP = pCtx->GetSp(); - m_RegDisplay.IP = pCtx->GetIp(); + m_RegDisplay.IP = controlPC; #ifdef TARGET_UNIX #define PTR_TO_REG(ptr, reg) (&((ptr)->reg())) @@ -1220,7 +1230,7 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() m_RegDisplay.pFP = SP++; - m_RegDisplay.SetIP(*SP++); + m_RegDisplay.SetIP(ReturnAddressToCanonicalPC(*SP++)); m_RegDisplay.pX19 = SP++; m_RegDisplay.pX20 = SP++; @@ -1633,7 +1643,7 @@ void StackFrameIterator::UnwindUniversalTransitionThunk() stackFrame->UnwindVolatileArgRegisters(&m_RegDisplay); PTR_uintptr_t addressOfPushedCallerIP = stackFrame->get_AddressOfPushedCallerIP(); - m_RegDisplay.SetIP(PCODEToPINSTR(*addressOfPushedCallerIP)); + m_RegDisplay.SetIP(ReturnAddressToCanonicalPC(*addressOfPushedCallerIP)); m_RegDisplay.SetSP((uintptr_t)dac_cast(stackFrame->get_CallerSP())); SetControlPC(dac_cast(m_RegDisplay.GetIP())); #if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) @@ -1764,7 +1774,7 @@ void StackFrameIterator::UnwindThrowSiteThunk() ASSERT_UNCONDITIONALLY("NYI for this arch"); #endif - m_RegDisplay.SetIP(PCODEToPINSTR(pContext->IP)); + m_RegDisplay.SetIP(ReturnAddressToCanonicalPC(pContext->IP)); m_RegDisplay.SetSP(pContext->GetSp()); SetControlPC(dac_cast(m_RegDisplay.GetIP())); @@ -1858,11 +1868,7 @@ void StackFrameIterator::NextInternal() // if the thread is safe to walk, it better not have a hijack in place. ASSERT(!m_pThread->IsHijacked()); -#if defined(TARGET_ARM64) - SetControlPC(PacStripPtr(dac_cast(PCODEToPINSTR(m_RegDisplay.GetIP())))); -#else - SetControlPC(dac_cast(PCODEToPINSTR(m_RegDisplay.GetIP()))); -#endif // TARGET_ARM64 + SetControlPC(dac_cast(m_RegDisplay.GetIP())); PTR_VOID collapsingTargetFrame = NULL; @@ -2197,10 +2203,6 @@ void StackFrameIterator::CalculateCurrentMethodState() return; } -#if defined(TARGET_ARM64) - m_ControlPC = PacStripPtr(m_ControlPC); -#endif // TARGET_ARM64 - // Assume that the caller is likely to be in the same module if (m_pCodeManager == NULL || !m_pCodeManager->FindMethodInfo(m_ControlPC, &m_methodInfo)) { diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index 9a547d0a45f3d6..140d89fdaa0e05 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -22,6 +22,10 @@ #include "eventtracebase.h" +#if defined(TARGET_ARM64) +extern "C" void* PacStripPtr(void* ptr); +#endif // TARGET_ARM64 + #define UBF_FUNC_KIND_MASK 0x03 #define UBF_FUNC_KIND_ROOT 0x00 #define UBF_FUNC_KIND_HANDLER 0x01 @@ -555,6 +559,10 @@ bool UnixNativeCodeManager::UnwindStackFrame(MethodInfo * pMethodInfo, return false; } +#if defined(TARGET_ARM64) + pRegisterSet->SetIP((PCODE)PacStripPtr((void*)pRegisterSet->GetIP())); +#endif // TARGET_ARM64 + return true; } diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index d098ffec84a7b5..f6c580fe625cee 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -22,6 +22,10 @@ #include "eventtracebase.h" +#if defined(TARGET_ARM64) +extern "C" void* PacStripPtr(void* ptr); +#endif // TARGET_ARM64 + #ifdef TARGET_X86 // Disable contracts @@ -812,7 +816,7 @@ bool CoffNativeCodeManager::UnwindStackFrame(MethodInfo * pMethodInfo, &contextPointers); pRegisterSet->SP = context.Sp; - pRegisterSet->IP = context.Pc; + pRegisterSet->IP = (PCODE)PacStripPtr((void*)context.Pc); if (!(flags & USFF_GcUnwind)) { From 493eff775b326cc951506e0bed72fc51b7c09028 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 7 May 2026 15:22:13 +0100 Subject: [PATCH 35/65] Track SP offset for PAC instruction separately while calculating hijackinfo This covers a usecase where SP adjustments are done before PAC instruction. e.g., ``` sub sp, sp, #0x30 paciasp stp fp, lr, [sp, #0x50]! ``` --- src/coreclr/vm/excep.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 74f3eaa5203583..b33e351dbb190c 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6415,6 +6415,7 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re SSIZE_T currentSpOffset = 0; SSIZE_T lrSlotOffset = SSIZE_T_MIN; + SSIZE_T pacSpOffset = 0; BOOL hasPacSignLR = false; constexpr SSIZE_T PtrSize = 8; @@ -6526,6 +6527,7 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re if (CurCode == 0xFC) // pac_sign_lr { + pacSpOffset = currentSpOffset; hasPacSignLR = true; continue; } @@ -6543,7 +6545,7 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re return false; } - *pSpForPacSign = (TADDR)((SSIZE_T)retAddrLocation - lrSlotOffset); + *pSpForPacSign = (TADDR)((SSIZE_T)retAddrLocation - (lrSlotOffset - pacSpOffset)); return true; } #endif // TARGET_ARM64 From bcbc1c122d9ace2394e208d01183c56618eed4f9 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 7 May 2026 14:04:53 +0100 Subject: [PATCH 36/65] Use B key on Windows and A key otherwise for PAC --- src/coreclr/jit/codegenarm64.cpp | 6 +++++- src/coreclr/jit/codegenarm64test.cpp | 5 +++++ src/coreclr/jit/emitarm64.cpp | 15 ++++++++++++++- src/coreclr/jit/emitfmtsarm64.h | 2 +- src/coreclr/jit/instrsarm64.h | 15 +++++++++++++++ src/coreclr/jit/unwindarm64.cpp | 4 ++-- src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S | 2 +- src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm | 4 ++-- .../Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs | 2 +- .../JitInterface/CorInfoImpl.RyuJit.cs | 2 +- src/coreclr/vm/arm64/asmhelpers.S | 2 +- src/coreclr/vm/arm64/asmhelpers.asm | 10 +++++----- 12 files changed, 53 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index e1b961dccc7bd3..3bff55bfb0b951 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5696,11 +5696,15 @@ void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() { // Tier0 signed LR with the Tier0 caller SP before allocating its frame. // Recreate that SP from the current Tier0 body SP so we can authenticate - // LR before the OSR prolog later re-signs it with the OSR SP via PACIASP. + // LR before the OSR prolog later re-signs it with the OSR SP. genInstrWithConstant(INS_add, EA_PTRSIZE, REG_IP0, REG_SPBASE, patchpointInfo->TotalFrameSize(), REG_IP0, /* inUnwindRegion */ false); GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_IP1, REG_LR, /* canSkip */ false); +#ifdef TARGET_WINDOWS + GetEmitter()->emitIns(INS_autib1716); +#else GetEmitter()->emitIns(INS_autia1716); +#endif GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_LR, REG_IP1, /* canSkip */ false); } diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 10b39a53c10e60..3ca2b4b13e1ecb 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -9143,10 +9143,15 @@ void CodeGen::genArm64EmitterUnitTestsPac() // IF_PC_0A theEmitter->emitIns(INS_autia1716); // AUTIA1716 theEmitter->emitIns(INS_autiasp); // AUTIASP + theEmitter->emitIns(INS_autib1716); // AUTIB1716 + theEmitter->emitIns(INS_autibsp); // AUTIBSP theEmitter->emitIns(INS_autiaz); // AUTIAZ theEmitter->emitIns(INS_pacia1716); // PACIA1716 theEmitter->emitIns(INS_paciasp); // PACIASP theEmitter->emitIns(INS_paciaz); // PACIAZ + theEmitter->emitIns(INS_pacib1716); // PACIB1716 + theEmitter->emitIns(INS_pacibsp); // PACIBSP + theEmitter->emitIns(INS_pacibz); // PACIBZ theEmitter->emitIns(INS_xpaclri); // XPACLRI // IF_PC_1A diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 5c882d026535d3..996b73f570db17 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1440,7 +1440,11 @@ void emitter::emitPacInProlog() { return; } +#ifdef TARGET_WINDOWS + emitIns(INS_pacibsp); +#else emitIns(INS_paciasp); +#endif m_compiler->unwindPacSignLR(); } @@ -1453,7 +1457,11 @@ void emitter::emitPacInEpilog() { return; } +#ifdef TARGET_WINDOWS + emitIns(INS_autibsp); +#else emitIns(INS_autiasp); +#endif m_compiler->unwindPacSignLR(); } @@ -3756,9 +3764,14 @@ void emitter::emitIns(instruction ins) case INS_autia1716: case INS_autiasp: case INS_autiaz: + case INS_autib1716: + case INS_autibsp: case INS_pacia1716: case INS_paciasp: case INS_paciaz: + case INS_pacib1716: + case INS_pacibsp: + case INS_pacibz: case INS_xpaclri: assert(fmt == IF_PC_0A); break; @@ -16229,7 +16242,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_PC_0A: // autia1716, autiasp, autiaz, pacia1716, paciasp, paciaz, xpaclri + case IF_PC_0A: // autia1716, autiasp, autib1716, autibsp, autiaz, pacia1716, paciasp, pacib1716, pacibsp, pacibz, paciaz, xpaclri case IF_PC_1A: // autiza, paciza, xpacd, xpaci case IF_PC_2A: // autia, pacia switch (ins) diff --git a/src/coreclr/jit/emitfmtsarm64.h b/src/coreclr/jit/emitfmtsarm64.h index b710230609a8df..1a042b461ee72a 100644 --- a/src/coreclr/jit/emitfmtsarm64.h +++ b/src/coreclr/jit/emitfmtsarm64.h @@ -232,7 +232,7 @@ IF_DEF(SI_0A, IS_NONE, NONE) // SI_0A ...........iiiii iiiiiiiiiii..... IF_DEF(SI_0B, IS_NONE, NONE) // SI_0B ................ ....bbbb........ imm4 - barrier // Pointer Authentication (PAC) groups -IF_DEF(PC_0A, IS_NONE, NONE) // PC_0A ................ ................ (autia1716, autiasp, autiaz, pacia1716, paciasp, paciaz, xpaclri) +IF_DEF(PC_0A, IS_NONE, NONE) // PC_0A ................ ................ (autia1716, autiasp, autib1716, autibsp, autiaz, pacia1716, paciasp, pacib1716, pacibsp, pacibz, paciaz, xpaclri) IF_DEF(PC_1A, IS_NONE, NONE) // PC_1A ................ ...........ddddd Rd (autiza, paciza, xpacd, xpaci) IF_DEF(PC_2A, IS_NONE, NONE) // PC_2A X........X...... ......nnnnnddddd Rd Rn (autia, pacia) diff --git a/src/coreclr/jit/instrsarm64.h b/src/coreclr/jit/instrsarm64.h index 4f94424065bf05..fccdadc676364e 100644 --- a/src/coreclr/jit/instrsarm64.h +++ b/src/coreclr/jit/instrsarm64.h @@ -1593,6 +1593,12 @@ INST1(autia1716, "autia1716", 0, IF_PC_0A, 0xD503219F) INST1(autiasp, "autiasp", 0, IF_PC_0A, 0xD50323BF) // autiasp PC_0A 1101010100000011 0010001110111111 D503 23BF +INST1(autib1716, "autib1716", 0, IF_PC_0A, 0xD50321DF) + // autib1716 PC_0A 1101010100000011 0010000111011111 D503 21DF + +INST1(autibsp, "autibsp", 0, IF_PC_0A, 0xD50323FF) + // autibsp PC_0A 1101010100000011 0010001111111111 D503 23FF + INST1(autiaz, "autiaz", 0, IF_PC_0A, 0xD503239F) // autiaz PC_0A 1101010100000011 0010001110011111 D503 239F @@ -1602,6 +1608,15 @@ INST1(pacia1716, "pacia1716 ", 0, IF_PC_0A, 0xD503211F) INST1(paciasp, "paciasp", 0, IF_PC_0A, 0xD503233F) // paciasp PC_0A 1101010100000011 0010001100111111 D503 233F +INST1(pacib1716, "pacib1716 ", 0, IF_PC_0A, 0xD503215F) + // pacib1716 PC_0A 1101010100000011 0010000101011111 D503 215F + +INST1(pacibsp, "pacibsp", 0, IF_PC_0A, 0xD503237F) + // pacibsp PC_0A 1101010100000011 0010001101111111 D503 237F + +INST1(pacibz, "pacibz", 0, IF_PC_0A, 0xD503235F) + // pacibz PC_0A 1101010100000011 0010001101011111 D503 235F + INST1(paciaz, "paciaz", 0, IF_PC_0A, 0xD503231F) // paciaz PC_0A 1101010100000011 0010001100011111 D503 231F diff --git a/src/coreclr/jit/unwindarm64.cpp b/src/coreclr/jit/unwindarm64.cpp index f5696b1aa841b4..620438ec644e5a 100644 --- a/src/coreclr/jit/unwindarm64.cpp +++ b/src/coreclr/jit/unwindarm64.cpp @@ -658,7 +658,7 @@ void Compiler::unwindPacSignLR() } #endif // FEATURE_CFI_SUPPORT - // pac_sign_lr: 11111100: sign the return address in lr with paciasp + // pac_sign_lr: 11111100: sign the return address in lr with the platform PAC key funCurrentFunc()->uwi.AddCode(0xFC); } @@ -1110,7 +1110,7 @@ void DumpUnwindInfo(Compiler* comp, } else if (b1 == 0xFC) { - // pac_sign_lr: 11111100 : sign the return address in lr with paciasp. + // pac_sign_lr: 11111100 : sign the return address in lr with the platform PAC key. printf(" %02X pac_sign_lr\n", b1); } diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S index 1d7afd19cd73f4..b68c5589e3431f 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S @@ -14,7 +14,7 @@ LEAF_END PacStripPtr, _TEXT // void* PacSignPtr(void *, void *); -// This function sign the input pointer using zero as salt. +// This function signs the input pointer using x1 as salt. // Thus we need to move input in lr, sign it and then copy it back to the result register. .arch_extension pauth LEAF_ENTRY PacSignPtr, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm index 320e58cf2b802a..1695a494fc8103 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm @@ -14,12 +14,12 @@ LEAF_END PacStripPtr ; void* PacSignPtr(void *, void *); -; This function sign the input pointer using zero as salt. +; This function signs the input pointer using x1 as salt. ; Thus we need to move input in lr, sign it and then copy it back to the result register. LEAF_ENTRY PacSignPtr mov x17, x0 mov x16, x1 - DCD 0xD503211F ; pacia1716 instruction in binary to avoid error while compiling with non-PAC enabled compilers + DCD 0xD503215F ; pacib1716 instruction in binary to avoid error while compiling with non-PAC enabled compilers mov x0, x17 ret LEAF_END PacSignPtr diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs index e79f859da5efce..24a51314396555 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs @@ -12,6 +12,6 @@ internal enum CFI_OPCODE CFI_DEF_CFA_REGISTER, // New register is used to compute CFA CFI_REL_OFFSET, // Register is saved at offset from the current CFA CFI_DEF_CFA, // Take address from register and add offset to it. - CFI_NEGATE_RA_STATE, // Sign the return address in lr with paciasp + CFI_NEGATE_RA_STATE, // Sign the return address in lr with the platform PAC key } } diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index 35dc6604a9f9e5..3d2f054676185e 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -102,7 +102,7 @@ private enum CFI_OPCODE CFI_DEF_CFA_REGISTER, // New register is used to compute CFA CFI_REL_OFFSET, // Register is saved at offset from the current CFA CFI_DEF_CFA, // Take address from register and add offset to it. - CFI_NEGATE_RA_STATE, // Sign the return address in lr with paciasp + CFI_NEGATE_RA_STATE, // Sign the return address in lr with the platform PAC key } // Get the CFI data in the same shape as clang/LLVM generated one. This improves the compatibility with libunwind and other unwind solutions diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 200b864140fa0b..c0bf67cae0e806 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -163,7 +163,7 @@ NESTED_END OnHijackTripThread, _TEXT LEAF_END PacStripPtr, _TEXT // void* PacSignPtr(void *, void *); -// This function sign the input pointer using SP as salt. +// This function signs the input pointer using x1 as salt. // To avoid failing on non-PAC enabled machines, we use pacia1716 which signs lr explicitly. // Thus we need to move input in lr, sign it and then copy it back to the result register. .arch_extension pauth diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 06d8cf670508a6..8d3a816caa9fe1 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -327,25 +327,25 @@ NoFloatingPointRetVal LEAF_END PacStripPtr ; void* PacSignPtr(void *, void *); -; This function sign the input pointer using zero as salt. -; To avoid failing on non-PAC enabled machines, we use pacia1716 which signs lr explicitly. +; This function signs the input pointer using x1 as salt. +; To avoid failing on non-PAC enabled machines, we use pacib1716 which signs lr explicitly. ; Thus we need to move input in lr, sign it and then copy it back to the result register. LEAF_ENTRY PacSignPtr mov x17, x0 mov x16, x1 - DCD 0xD503211F ; pacia1716 instruction in binary to avoid error while compiling with non-PAC enabled compilers + DCD 0xD503215F ; pacib1716 instruction in binary to avoid error while compiling with non-PAC enabled compilers mov x0, x17 ret LEAF_END PacSignPtr ; void* PacAuthPtr(void *, void *); ; This function authenticates the input signed-pointer using x1 as salt. -; To avoid failing on non-PAC enabled machines, we use pacia1716 which authenticates lr explicitly. +; To avoid failing on non-PAC enabled machines, we use autib1716 which authenticates lr explicitly. ; Thus we need to move input in lr, authenticate it and then copy it back to the result register. LEAF_ENTRY PacAuthPtr mov x17, x0 mov x16, x1 - DCD 0xD503219F ; autia1716 instruction in binary to avoid error while compiling with non-PAC enabled compilers + DCD 0xD50321DF ; autib1716 instruction in binary to avoid error while compiling with non-PAC enabled compilers mov x0, x17 ret LEAF_END PacAuthPtr From 24d44ea6a4ecbf2aba3f455c95bf377e30a657af Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 7 May 2026 19:20:35 +0100 Subject: [PATCH 37/65] Fix formatting --- src/coreclr/jit/emitarm64.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 996b73f570db17..dfcf2b5e6bdfb6 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -16242,7 +16242,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_PC_0A: // autia1716, autiasp, autib1716, autibsp, autiaz, pacia1716, paciasp, pacib1716, pacibsp, pacibz, paciaz, xpaclri + case IF_PC_0A: // autia1716, autiasp, autib1716, autibsp, autiaz, pacia1716, paciasp, pacib1716, pacibsp, + // pacibz, paciaz, xpaclri case IF_PC_1A: // autiza, paciza, xpacd, xpaci case IF_PC_2A: // autia, pacia switch (ins) From 7df0c1f14a5d53e257316ef1c53b5a74397bfc63 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 8 May 2026 11:00:49 +0100 Subject: [PATCH 38/65] Address review comments --- src/coreclr/jit/codegenarm64.cpp | 6 +----- src/coreclr/jit/emitarm64.cpp | 12 ++---------- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 3bff55bfb0b951..6fdcb7496dea44 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5700,11 +5700,7 @@ void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() genInstrWithConstant(INS_add, EA_PTRSIZE, REG_IP0, REG_SPBASE, patchpointInfo->TotalFrameSize(), REG_IP0, /* inUnwindRegion */ false); GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_IP1, REG_LR, /* canSkip */ false); -#ifdef TARGET_WINDOWS - GetEmitter()->emitIns(INS_autib1716); -#else - GetEmitter()->emitIns(INS_autia1716); -#endif + GetEmitter()->emitIns(TargetOS::IsWindows ? INS_autib1716 : INS_autia1716); GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_LR, REG_IP1, /* canSkip */ false); } diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index dfcf2b5e6bdfb6..630f15e7040089 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1440,11 +1440,7 @@ void emitter::emitPacInProlog() { return; } -#ifdef TARGET_WINDOWS - emitIns(INS_pacibsp); -#else - emitIns(INS_paciasp); -#endif + emitIns(TargetOS::IsWindows ? INS_pacibsp : INS_paciasp); m_compiler->unwindPacSignLR(); } @@ -1457,11 +1453,7 @@ void emitter::emitPacInEpilog() { return; } -#ifdef TARGET_WINDOWS - emitIns(INS_autibsp); -#else - emitIns(INS_autiasp); -#endif + emitIns(TargetOS::IsWindows ? INS_autibsp : INS_autiasp); m_compiler->unwindPacSignLR(); } From e9be308421f5bcb8c2175f75dc7fe5a95c2d1d12 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 8 May 2026 16:59:46 +0100 Subject: [PATCH 39/65] Track PAC CFA offset while unwinding for NativeAOT --- .../nativeaot/Runtime/unix/UnixNativeCodeManager.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index 140d89fdaa0e05..dca47eb39c31c7 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -114,10 +114,11 @@ struct PacFrameInfo bool hasPac; int cfaOffset; int lrOffset; + int pacCfaOffset; }; static bool TryGetPacFrameInfo(UnixNativeMethodInfo *pNativeMethodInfo, - PacFrameInfo *pPacFrameInfo) + PacFrameInfo *pPacFrameInfo) { const uint8_t* p = (const uint8_t*)pNativeMethodInfo->unwind_info; uint32_t fdeLength = *dac_cast((uint8_t*)p); @@ -141,6 +142,7 @@ static bool TryGetPacFrameInfo(UnixNativeMethodInfo *pNativeMethodInfo, int cfaOffset = 0; int lrOffset = INT_MIN; + int pacCfaOffset = 0; bool hasPac = false; while (p < end) @@ -149,6 +151,7 @@ static bool TryGetPacFrameInfo(UnixNativeMethodInfo *pNativeMethodInfo, if (op == DW_CFA_AARCH64_negate_ra_state) { + pacCfaOffset = cfaOffset; hasPac = true; continue; } @@ -238,6 +241,7 @@ static bool TryGetPacFrameInfo(UnixNativeMethodInfo *pNativeMethodInfo, pPacFrameInfo->hasPac = hasPac; pPacFrameInfo->cfaOffset = cfaOffset; pPacFrameInfo->lrOffset = lrOffset; + pPacFrameInfo->pacCfaOffset = pacCfaOffset; return true; } @@ -251,10 +255,11 @@ static bool TryGetSpForPacSigning(const PacFrameInfo& pacFrameInfo, return true; } - if (ppvRetAddrLocation == NULL || pacFrameInfo.lrOffset == INT_MIN || pacFrameInfo.cfaOffset < pacFrameInfo.lrOffset) + if (ppvRetAddrLocation == NULL || pacFrameInfo.lrOffset == INT_MIN) return false; - *pSpForPacSign = dac_cast(ppvRetAddrLocation) + (pacFrameInfo.cfaOffset - pacFrameInfo.lrOffset); + *pSpForPacSign = dac_cast(ppvRetAddrLocation) + + (pacFrameInfo.cfaOffset - pacFrameInfo.lrOffset - pacFrameInfo.pacCfaOffset); return true; } #endif // TARGET_ARM64 From ddccab00f15653d0c24c831bc6fafa813834f76c Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 8 May 2026 17:02:16 +0100 Subject: [PATCH 40/65] Match finding SP for PAC signing on Windows NativeAOT with the JIT --- .../Runtime/windows/CoffNativeCodeManager.cpp | 232 ++++++++++++++++-- src/coreclr/vm/excep.cpp | 16 +- 2 files changed, 218 insertions(+), 30 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index f6c580fe625cee..414e7b592de545 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -844,31 +844,66 @@ bool CoffNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) } #if defined(TARGET_ARM64) -static bool HasPacInUnwindInfo(PTR_VOID pUnwindDataBlob, size_t unwindDataBlobSize) +static bool TryGetSpForPacSigning(PTR_VOID pUnwindDataBlob, + size_t unwindDataBlobSize, + PTR_PTR_VOID ppvRetAddrLocation, + TADDR *pSpForPacSign) { - PTR_uint8_t UnwindCodePtr = dac_cast(pUnwindDataBlob); - PTR_uint8_t UnwindCodesEndPtr = dac_cast(pUnwindDataBlob) + unwindDataBlobSize; + ASSERT(pSpForPacSign != nullptr); - while (UnwindCodePtr < UnwindCodesEndPtr) + *pSpForPacSign = 0; + + //TODO-PAC: Bail out in prolog and epilog for consistency with GetPacSignInfo() in JIT + + ASSERT(unwindDataBlobSize >= sizeof(DWORD)); + + PTR_uint8_t unwindDataPtr = dac_cast(pUnwindDataBlob); + PTR_uint8_t unwindDataEndPtr = unwindDataPtr + unwindDataBlobSize; + + // For unwind info layout details refer https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling?view=msvc-170#arm64-exception-handling-information + // Read the header word. + DWORD HeaderWord = *dac_cast((uint8_t*)unwindDataPtr); + unwindDataPtr += sizeof(DWORD); + + ASSERT(((HeaderWord >> 18) & 3) == 0); // Version 0 is the only supported version. + + ULONG UnwindWords = (HeaderWord >> 27) & 31; + ULONG EpilogScopeCount = (HeaderWord >> 22) & 31; + if (EpilogScopeCount == 0 && UnwindWords == 0) { - uint8_t CurCode = *UnwindCodePtr; - if (CurCode == 0xe4) // The last unwind code + if ((unwindDataPtr + sizeof(DWORD)) > unwindDataEndPtr) { - break; + return false; } - if (CurCode == 0xFC) // Unwind code for PAC (pac_sign_lr) - { - return true; - } + DWORD extendedCounts = *dac_cast((uint8_t*)unwindDataPtr); + unwindDataPtr += sizeof(DWORD); + UnwindWords = (extendedCounts >> 16) & 0xFF; + EpilogScopeCount = extendedCounts & 0xFFFF; + } + + if ((HeaderWord & (1 << 21)) != 0) + { + EpilogScopeCount = 0; + } + + if ((unwindDataPtr + (EpilogScopeCount * sizeof(DWORD)) + (UnwindWords * sizeof(DWORD))) > unwindDataEndPtr) + { + return false; + } - if (CurCode < 0xC0) + PTR_uint8_t UnwindCodePtr = unwindDataPtr + (EpilogScopeCount * sizeof(DWORD)); + PTR_uint8_t UnwindCodesEndPtr = UnwindCodePtr + (UnwindWords * sizeof(DWORD)); + + auto GetUnwindOpSize = [](BYTE unwindCode) -> SIZE_T + { + if (unwindCode < 0xC0) { - UnwindCodePtr += 1; + return 1; } - else if (CurCode < 0xE0) + else if (unwindCode < 0xE0) { - UnwindCodePtr += 2; + return 2; } else { @@ -877,11 +912,164 @@ static bool HasPacInUnwindInfo(PTR_VOID pUnwindDataBlob, size_t unwindDataBlobSi 4,1,2,1,1,1,1,3, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 2,3,4,5,1,1,1,1 }; - UnwindCodePtr += UnwindCodeSizeTable[CurCode - 0xE0]; + return UnwindCodeSizeTable[unwindCode - 0xE0]; + } + }; + + TADDR* unwindOpStarts = (TADDR*)_alloca((UnwindCodesEndPtr - UnwindCodePtr) * sizeof(TADDR)); + ULONG unwindOpIndex = 0; + for (PTR_uint8_t unwindOpPtr = UnwindCodePtr; unwindOpPtr < UnwindCodesEndPtr;) + { + BYTE curCode = *unwindOpPtr; + if (curCode == 0xE4) // end + { + break; + } + + SIZE_T unwindOpSize = GetUnwindOpSize(curCode); + if ((unwindOpPtr + unwindOpSize) > UnwindCodesEndPtr) + { + return false; } + + unwindOpStarts[unwindOpIndex++] = dac_cast(unwindOpPtr); + unwindOpPtr += unwindOpSize; } - return false; + SSIZE_T currentSpOffset = 0; + SSIZE_T lrSlotOffset = SSIZE_T_MIN; + SSIZE_T pacSpOffset = 0; + bool hasPacSignLR = false; + constexpr SSIZE_T PtrSize = 8; + + // ARM64 prolog unwind codes are stored in reverse prolog order. Replay them in prolog order so + // PACIASP/PACIBSP captures the SP that was live when LR was originally signed. + while (unwindOpIndex != 0) + { + UnwindCodePtr = dac_cast(unwindOpStarts[--unwindOpIndex]); + BYTE CurCode = *UnwindCodePtr; + + if (((CurCode & 0xFC) == 0xC8) || // save_regp + ((CurCode & 0xFE) == 0xD8) || // save_fregp + ((CurCode & 0xFE) == 0xDC) || // save_freg + CurCode == 0xE1 || // set_fp + CurCode == 0xE2 || // add_fp + CurCode == 0xE3 || // nop + CurCode == 0xE5 || // end_c + CurCode == 0xE6) // save_next + { + continue; + } + + if ((CurCode & 0xE0) == 0x00) // alloc_s + { + currentSpOffset -= (CurCode & 0x1F) * 16; + continue; + } + + if ((CurCode & 0xE0) == 0x20) // save_r19r20_x + { + currentSpOffset -= (CurCode & 0x1F) * 8; + continue; + } + + if ((CurCode & 0xC0) == 0x40) // save_fplr + { + lrSlotOffset = currentSpOffset + ((CurCode & 0x3F) * 8) + PtrSize; + continue; + } + + if ((CurCode & 0xC0) == 0x80) // save_fplr_x + { + currentSpOffset -= ((CurCode & 0x3F) + 1) * 8; + lrSlotOffset = currentSpOffset + PtrSize; + continue; + } + + if ((CurCode & 0xF8) == 0xC0) // alloc_m + { + ULONG x = ((CurCode & 0x7) << 8) | *(UnwindCodePtr + 1); + currentSpOffset -= x * 16; + continue; + } + + if (((CurCode & 0xFC) == 0xCC) || // save_regp_x + ((CurCode & 0xFE) == 0xDA)) // save_fregp_x + { + ULONG z = *(UnwindCodePtr + 1) & 0x3F; + currentSpOffset -= (z + 1) * 8; + continue; + } + + if ((CurCode & 0xFC) == 0xD0) // save_reg + { + BYTE nextCode = *(UnwindCodePtr + 1); + ULONG x = ((CurCode & 0x3) << 2) | (nextCode >> 6); + ULONG z = nextCode & 0x3F; + if (x == 11) // R30 / LR is the 12th GP register in the save_reg encodings + { + lrSlotOffset = currentSpOffset + z * 8; + } + + continue; + } + + if ((CurCode & 0xFE) == 0xD4) // save_reg_x + { + BYTE nextCode = *(UnwindCodePtr + 1); + ULONG x = ((CurCode & 0x1) << 3) | (nextCode >> 5); + currentSpOffset -= ((nextCode & 0x1F) + 1) * 8; + if (x == 11) // R30 / LR is the 12th GP register in the save_reg encodings + { + lrSlotOffset = currentSpOffset; + } + + continue; + } + + if ((CurCode & 0xFE) == 0xD6) // save_lrpair + { + ULONG z = *(UnwindCodePtr + 1) & 0x3F; + lrSlotOffset = currentSpOffset + z * 8 + PtrSize; + continue; + } + + if (CurCode == 0xDE) // save_freg_x + { + ULONG z = *(UnwindCodePtr + 1) & 0x1F; + currentSpOffset -= (z + 1) * 8; + continue; + } + + if (CurCode == 0xE0) // alloc_l + { + ULONG x = (*(UnwindCodePtr + 1) << 16) | (*(UnwindCodePtr + 2) << 8) | *(UnwindCodePtr + 3); + currentSpOffset -= x * 16; + continue; + } + + if (CurCode == 0xFC) // pac_sign_lr + { + pacSpOffset = currentSpOffset; + hasPacSignLR = true; + continue; + } + + return false; + } + + if (!hasPacSignLR) + { + return true; + } + + if (lrSlotOffset == SSIZE_T_MIN) + { + return false; + } + + *pSpForPacSign = (TADDR)((SSIZE_T)dac_cast(ppvRetAddrLocation) - (lrSlotOffset - pacSpOffset)); + return true; } #endif //TARGET_ARM64 @@ -961,11 +1149,6 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn return false; } - if (HasPacInUnwindInfo(pUnwindDataBlob, unwindDataBlobSize)) - { - *pSpForArm64PacSign = pRegisterSet->GetSP(); - } - context.Sp = pRegisterSet->GetSP(); context.Fp = pRegisterSet->GetFP(); context.Pc = pRegisterSet->GetIP(); @@ -997,6 +1180,11 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn } *ppvRetAddrLocation = (PTR_PTR_VOID)contextPointers.Lr; + if (!TryGetSpForPacSigning(pUnwindDataBlob, unwindDataBlobSize, *ppvRetAddrLocation, pSpForArm64PacSign)) + { + return false; + } + return true; #else *pSpForArm64PacSign = 0; diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index b33e351dbb190c..80d6419535d476 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6350,7 +6350,7 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re // For unwind info layout details refer https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling?view=msvc-170#arm64-exception-handling-information // Read the header word. DWORD HeaderWord = *(DWORD*)UnwindDataPtr; - UnwindDataPtr += 4; + UnwindDataPtr += sizeof(DWORD); _ASSERTE(((HeaderWord >> 18) & 3) == 0); // Version 0 is the only supported version. @@ -6358,10 +6358,10 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re ULONG EpilogScopeCount = (HeaderWord >> 22) & 31; if (EpilogScopeCount == 0 && UnwindWords == 0) { - EpilogScopeCount = *(DWORD*)UnwindDataPtr; - UnwindDataPtr += 4; - UnwindWords = (EpilogScopeCount >> 16) & 0xFF; - EpilogScopeCount &= 0xFFFF; + DWORD extendedCounts = *(DWORD*)UnwindDataPtr; + UnwindDataPtr += sizeof(DWORD); + UnwindWords = (extendedCounts >> 16) & 0xFF; + EpilogScopeCount = extendedCounts & 0xFFFF; } if ((HeaderWord & (1 << 21)) != 0) @@ -6369,8 +6369,8 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re EpilogScopeCount = 0; } - ULONG_PTR UnwindCodePtr = UnwindDataPtr + 4 * EpilogScopeCount; - ULONG_PTR UnwindCodesEndPtr = UnwindCodePtr + 4 * UnwindWords; + ULONG_PTR UnwindCodePtr = UnwindDataPtr + sizeof(DWORD) * EpilogScopeCount; + ULONG_PTR UnwindCodesEndPtr = UnwindCodePtr + sizeof(DWORD) * UnwindWords; auto GetUnwindOpSize = [](BYTE unwindCode) -> SIZE_T { @@ -6420,7 +6420,7 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re constexpr SSIZE_T PtrSize = 8; // ARM64 prolog unwind codes are stored in reverse prolog order. Replay them in prolog order so - // PACIASP captures the SP that was live when LR was originally signed. + // PACIASP/PACIBSP captures the SP that was live when LR was originally signed. while (unwindOpIndex != 0) { UnwindCodePtr = unwindOpStarts[--unwindOpIndex]; From ff1f08e56a11136a276bd97105e9959f8090e1ad Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Mon, 11 May 2026 10:57:26 +0100 Subject: [PATCH 41/65] Fix build errors --- .../nativeaot/Runtime/windows/CoffNativeCodeManager.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index 414e7b592de545..8bd0b67c5b2299 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -937,7 +937,7 @@ static bool TryGetSpForPacSigning(PTR_VOID pUnwindDataBlob, } SSIZE_T currentSpOffset = 0; - SSIZE_T lrSlotOffset = SSIZE_T_MIN; + SSIZE_T lrSlotOffset = -1; SSIZE_T pacSpOffset = 0; bool hasPacSignLR = false; constexpr SSIZE_T PtrSize = 8; @@ -1063,7 +1063,7 @@ static bool TryGetSpForPacSigning(PTR_VOID pUnwindDataBlob, return true; } - if (lrSlotOffset == SSIZE_T_MIN) + if (lrSlotOffset == -1) { return false; } From af97173116655f02cd97df304a54e4335158c1c6 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Wed, 13 May 2026 16:16:09 +0100 Subject: [PATCH 42/65] Remove redundant changes --- src/coreclr/inc/gcinfodecoder.h | 1 + src/coreclr/vm/arm64/asmhelpers.S | 1 + 2 files changed, 2 insertions(+) diff --git a/src/coreclr/inc/gcinfodecoder.h b/src/coreclr/inc/gcinfodecoder.h index 69a45d6d349710..0836dfd0c54b68 100644 --- a/src/coreclr/inc/gcinfodecoder.h +++ b/src/coreclr/inc/gcinfodecoder.h @@ -798,3 +798,4 @@ typedef TGcInfoDecoder InterpreterGcInfoDecoder; #endif // _GC_INFO_DECODER_ + diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index c0bf67cae0e806..30b624a1f7309e 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -3548,3 +3548,4 @@ NESTED_ENTRY IL_Rethrow, _TEXT, NoHandler // Should never return brk #0 NESTED_END IL_Rethrow, _TEXT + From c40c08cd099653ba227cd5bcb3397969b5198691 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 14 May 2026 11:19:39 +0100 Subject: [PATCH 43/65] Make JitPacEnabled config variable Arm64 only --- src/coreclr/jit/codegenarmarch.cpp | 2 ++ src/coreclr/jit/jitconfigvalues.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 20d41c662be1e2..8f518b11c3cc91 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4484,10 +4484,12 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe } #endif // DEBUG +#if defined(TARGET_ARM64) if (JitConfig.JitPacEnabled() != 0) { GetEmitter()->emitPacInProlog(); } +#endif // TARGET_ARM64 // The frameType number is arbitrary, is defined below, and corresponds to one of the frame styles we // generate based on various sizes. diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index ae1aaaa6a687e1..290515d40a04a3 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -126,7 +126,9 @@ CONFIG_STRING(JitInlineMethodsWithEHRange, "JitInlineMethodsWithEHRange") CONFIG_INTEGER(JitLongAddress, "JitLongAddress", 0) // Force using the large pseudo instruction form for long address CONFIG_INTEGER(JitMaxUncheckedOffset, "JitMaxUncheckedOffset", 8) +#if defined(TARGET_ARM64) RELEASE_CONFIG_INTEGER(JitPacEnabled, "JitPacEnabled", 1) +#endif // Enable devirtualization for generic virtual methods RELEASE_CONFIG_INTEGER(JitEnableGenericVirtualDevirtualization, "JitEnableGenericVirtualDevirtualization", 1) From a6b66415ae352131841f1386b28054e9a41b403f Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 15 May 2026 14:00:28 +0100 Subject: [PATCH 44/65] Restore clearing THUMB_CODE on arm32 --- src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 12af7932742cbf..6a63f7804a1289 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -71,9 +71,9 @@ extern "C" void* PacStripPtr(void* ptr); static TADDR ReturnAddressToCanonicalPC(TADDR returnAddress) { #if defined(TARGET_ARM64) - return (TADDR)PacStripPtr((void*)returnAddress); + returnAddress = (TADDR)PacStripPtr((void*)returnAddress); #endif // TARGET_ARM64 - return returnAddress; + return PCODEToPINSTR(dac_cast(returnAddress)); } StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PInvokeTransitionFrame* pInitialTransitionFrame) From 401b49621f5f8a6e44d509c8d54ff9431f68312e Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Tue, 19 May 2026 16:44:22 +0100 Subject: [PATCH 45/65] Use PAL unwinder for UNIX to retrieve signing SP for PAC --- .../Runtime/ExceptionServices/AsmOffsets.cs | 8 +-- src/coreclr/inc/clrnt.h | 18 ++++++ src/coreclr/inc/regdisp.h | 10 ++++ src/coreclr/jit/codegenarm64.cpp | 1 + src/coreclr/unwinder/arm64/unwinder.cpp | 58 +++++++++++++++++++ src/coreclr/vm/eetwain.cpp | 11 +++- src/coreclr/vm/excep.cpp | 22 ++----- src/coreclr/vm/excep.h | 4 +- src/coreclr/vm/stackwalk.cpp | 31 +++++++++- src/coreclr/vm/threads.h | 2 +- src/coreclr/vm/threadsuspend.cpp | 37 +++++++----- 11 files changed, 161 insertions(+), 41 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index 23474fdb1e629d..47857e9009a814 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -26,11 +26,11 @@ class AsmOffsets #endif // TARGET_UNIX #elif TARGET_ARM64 #if TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x9e0; + public const int SIZEOF__REGDISPLAY = 0x9f0; public const int OFFSETOF__REGDISPLAY__SP = 0x938; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x940; #else // TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x940; + public const int SIZEOF__REGDISPLAY = 0x950; public const int OFFSETOF__REGDISPLAY__SP = 0x898; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x8a0; #endif // TARGET_UNIX @@ -87,11 +87,11 @@ class AsmOffsets #endif // TARGET_UNIX #elif TARGET_ARM64 #if TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x9d0; + public const int SIZEOF__REGDISPLAY = 0x9e0; public const int OFFSETOF__REGDISPLAY__SP = 0x930; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x938; #else // TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x930; + public const int SIZEOF__REGDISPLAY = 0x940; public const int OFFSETOF__REGDISPLAY__SP = 0x890; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x898; #endif // TARGET_UNIX diff --git a/src/coreclr/inc/clrnt.h b/src/coreclr/inc/clrnt.h index a8f093dd188bc4..58252127ec1c16 100644 --- a/src/coreclr/inc/clrnt.h +++ b/src/coreclr/inc/clrnt.h @@ -365,6 +365,24 @@ RtlVirtualUnwind( IN OUT PKNONVOLATILE_CONTEXT_POINTERS ContextPointers OPTIONAL ); +#if defined(TARGET_UNIX) +EXTERN_C +NTSYSAPI +PEXCEPTION_ROUTINE +NTAPI +RtlVirtualUnwindWithSpForPacSign( + IN ULONG HandlerType, + IN ULONG64 ImageBase, + IN ULONG64 ControlPc, + IN PRUNTIME_FUNCTION FunctionEntry, + IN OUT PCONTEXT ContextRecord, + OUT PVOID *HandlerData, + OUT PULONG64 EstablisherFrame, + IN OUT PKNONVOLATILE_CONTEXT_POINTERS ContextPointers OPTIONAL, + OUT PULONG64 SpForPacSign OPTIONAL + ); +#endif // TARGET_UNIX + // Mirror the XSTATE_ARM64_SVE flags from winnt.h #ifndef XSTATE_ARM64_SVE diff --git a/src/coreclr/inc/regdisp.h b/src/coreclr/inc/regdisp.h index afd24a1d566df7..487537ff61c2c3 100644 --- a/src/coreclr/inc/regdisp.h +++ b/src/coreclr/inc/regdisp.h @@ -44,6 +44,11 @@ struct REGDISPLAY_BASE { TADDR SP; TADDR ControlPC; // LOONGARCH: use RA for PC +#if defined(TARGET_ARM64) + TADDR CurrentContextSpForPacSign; + TADDR CallerContextSpForPacSign; +#endif // TARGET_ARM64 + #if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) TADDR SSP; #endif @@ -453,6 +458,11 @@ inline void FillRegDisplay(const PREGDISPLAY pRD, PT_CONTEXT pctx, PT_CONTEXT pC pRD->IsCallerSPValid = TRUE; // Don't add usage of this field. This is only temporary. } +#if defined(TARGET_ARM64) + pRD->CurrentContextSpForPacSign = 0; + pRD->CallerContextSpForPacSign = 0; +#endif // TARGET_ARM64 + #ifdef DEBUG_REGDISPLAY pRD->_pThread = NULL; #endif // DEBUG_REGDISPLAY diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index f6fa57b960e980..b06de35e3a3d37 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -498,6 +498,7 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, assert(reg1 != REG_LR); GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX); m_compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta); + needToSaveRegs = false; } else // (spOffset != 0) || (spDelta < -512) diff --git a/src/coreclr/unwinder/arm64/unwinder.cpp b/src/coreclr/unwinder/arm64/unwinder.cpp index f3c398c123af66..e4d555eb08065a 100644 --- a/src/coreclr/unwinder/arm64/unwinder.cpp +++ b/src/coreclr/unwinder/arm64/unwinder.cpp @@ -193,6 +193,7 @@ typedef struct _ARM64_UNWIND_PARAMS PULONG_PTR LowLimit; PULONG_PTR HighLimit; PKNONVOLATILE_CONTEXT_POINTERS ContextPointers; + PULONG_PTR SpForPacSign; } ARM64_UNWIND_PARAMS, *PARM64_UNWIND_PARAMS; #define UNWIND_PARAMS_SET_TRAP_FRAME(Params, Address, Size) @@ -2406,6 +2407,10 @@ Return Value: return STATUS_UNWIND_INVALID_SEQUENCE; } + if (UnwindParams->SpForPacSign != NULL) { + *UnwindParams->SpForPacSign = ContextRecord->Sp; + } + HANDLE_PAC(&ContextRecord->Lr, ContextRecord->Sp); // @@ -2564,6 +2569,7 @@ RtlpxVirtualUnwind ( _In_opt_ PULONG_PTR LowLimit, _In_opt_ PULONG_PTR HighLimit, _Outptr_opt_result_maybenull_ PEXCEPTION_ROUTINE *HandlerRoutine, + _Out_opt_ PULONG_PTR SpForPacSign, _In_ ULONG UnwindFlags ) @@ -2620,6 +2626,9 @@ Routine Description: language specific exception handler is returned. Otherwise, NULL is returned. + SpForPacSign - Supplies an optional pointer to retrieve the SP used to + sign the return address when pointer authentication (PAC) is enabled. + UnwindFlags - Supplies additional flags for the unwind operation. Return Value: @@ -2639,6 +2648,10 @@ Return Value: UNWINDER_ASSERT((UnwindFlags & ~RTL_VIRTUAL_UNWIND_VALID_FLAGS_ARM64) == 0); + if (SpForPacSign != NULL) { + *SpForPacSign = 0; + } + if (FunctionEntry == NULL) { // @@ -2699,6 +2712,8 @@ Return Value: UnwindParams.LowLimit = LowLimit; UnwindParams.HighLimit = HighLimit; UnwindParams.ContextPointers = ContextPointers; + UnwindParams.SpForPacSign = SpForPacSign; + UnwindType = (FunctionEntry->UnwindData & 3); // @@ -2802,6 +2817,7 @@ BOOL OOPStackUnwinderArm64::Unwind(T_CONTEXT * pContext) NULL, NULL, &DummyHandlerRoutine, + NULL, 0); // @@ -2869,6 +2885,7 @@ RtlVirtualUnwind( NULL, NULL, &HandlerRoutine, + NULL, 0); // @@ -2883,4 +2900,45 @@ RtlVirtualUnwind( return HandlerRoutine; } + +PEXCEPTION_ROUTINE +RtlVirtualUnwindWithSpForPacSign( + IN ULONG HandlerType, + IN ULONG64 ImageBase, + IN ULONG64 ControlPc, + IN PRUNTIME_FUNCTION FunctionEntry, + IN OUT PCONTEXT ContextRecord, + OUT PVOID *HandlerData, + OUT PULONG64 EstablisherFrame, + IN OUT PKNONVOLATILE_CONTEXT_POINTERS ContextPointers OPTIONAL, + OUT PULONG64 SpForPacSign OPTIONAL + ) +{ + PEXCEPTION_ROUTINE HandlerRoutine; + NTSTATUS Status; + + HandlerRoutine = NULL; + Status = RtlpxVirtualUnwind(HandlerType, + ImageBase, + ControlPc, + (PIMAGE_ARM64_RUNTIME_FUNCTION_ENTRY)FunctionEntry, + ContextRecord, + HandlerData, + EstablisherFrame, + ContextPointers, + NULL, + NULL, + &HandlerRoutine, + (PULONG_PTR)SpForPacSign, + 0); + + if (!NT_SUCCESS(Status)) { + ContextRecord->Pc = 0; + if (SpForPacSign != NULL) { + *SpForPacSign = 0; + } + } + + return HandlerRoutine; +} #endif diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index 12261ea5d4d492..83959e213c4dce 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -788,6 +788,9 @@ void EECodeManager::EnsureCallerContextIsValid( PREGDISPLAY pRD, EECodeInfo * p *(pRD->pCallerContext) = *(pRD->pCurrentContext); // Skip updating context registers for light unwind Thread::VirtualUnwindCallFrame(pRD->pCallerContext, NULL, pCodeInfo); +#if defined(TARGET_ARM64) + pRD->CallerContextSpForPacSign = 0; +#endif // TARGET_ARM64 #endif } else @@ -795,7 +798,13 @@ void EECodeManager::EnsureCallerContextIsValid( PREGDISPLAY pRD, EECodeInfo * p // We need to make a copy here (instead of switching the pointers), in order to preserve the current context *(pRD->pCallerContext) = *(pRD->pCurrentContext); *(pRD->pCallerContextPointers) = *(pRD->pCurrentContextPointers); - Thread::VirtualUnwindCallFrame(pRD->pCallerContext, pRD->pCallerContextPointers, pCodeInfo); +#if defined(TARGET_ARM64) + pRD->CallerContextSpForPacSign = 0; +#endif // TARGET_ARM64 + Thread::VirtualUnwindCallFrame(pRD->pCallerContext, + pRD->pCallerContextPointers, + pCodeInfo + ARM64_ARG(&pRD->CallerContextSpForPacSign)); } pRD->IsCallerContextValid = TRUE; diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 80d6419535d476..479ec3318c1f1c 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6312,9 +6312,11 @@ bool IsIPInEpilog(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, BOOL *pSaf return fIsInEpilog; } -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64) && !defined(TARGET_UNIX) // Read the PAC state for a managed ARM64 frame and, when PAC is enabled, recover the -// SP value that was live when PACIASP signed the return address in LR. +// SP value that was live when PACIASP signed the return address in LR. Unix ARM64 gets +// this directly from the PAL unwinder while it is unwinding the frame. This fallback +// is used by the Windows hijack path in threadsuspend.cpp. bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR retAddrLocation, TADDR *pSpForPacSign) { _ASSERTE(pContextToCheck != nullptr); @@ -6323,20 +6325,6 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re *pSpForPacSign = 0; - // In prolog or epilog while the current frame is still being established or torn down - // retrieving correct SP is complex. We conservatively bail-out in this case. - // TODO-PAC: Explore opportunities to retrieve SP while in prolog/epilog. - if (IsIPInProlog(pCodeInfo)) - { - return false; - } - - BOOL unused = TRUE; - if (IsIPInEpilog(pContextToCheck, pCodeInfo, &unused)) - { - return false; - } - // Lookup the function entry for the IP PTR_RUNTIME_FUNCTION FunctionEntry = pCodeInfo->GetFunctionEntry(); @@ -6548,7 +6536,7 @@ bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR re *pSpForPacSign = (TADDR)((SSIZE_T)retAddrLocation - (lrSlotOffset - pacSpOffset)); return true; } -#endif // TARGET_ARM64 +#endif // TARGET_ARM64 && !TARGET_UNIX #endif // FEATURE_HIJACK && (!TARGET_X86 || TARGET_UNIX) diff --git a/src/coreclr/vm/excep.h b/src/coreclr/vm/excep.h index 97157b3566edee..8fb299a3955efd 100644 --- a/src/coreclr/vm/excep.h +++ b/src/coreclr/vm/excep.h @@ -29,9 +29,9 @@ BOOL AdjustContextForJITHelpers(EXCEPTION_RECORD *pExceptionRecord, CONTEXT *pCo // General purpose functions for use on an IP in jitted code. bool IsIPInProlog(EECodeInfo *pCodeInfo); bool IsIPInEpilog(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, BOOL *pSafeToInjectThreadAbort); -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64) && !defined(TARGET_UNIX) bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR retAddrLocation, TADDR *pSpForPacSign); -#endif // TARGET_ARM64 +#endif // TARGET_ARM64 && !TARGET_UNIX #endif // FEATURE_HIJACK && (!TARGET_X86 || TARGET_UNIX) // Enums diff --git a/src/coreclr/vm/stackwalk.cpp b/src/coreclr/vm/stackwalk.cpp index f8e344552c303e..8a79f4142af09f 100644 --- a/src/coreclr/vm/stackwalk.cpp +++ b/src/coreclr/vm/stackwalk.cpp @@ -365,6 +365,12 @@ UINT_PTR Thread::VirtualUnwindCallFrame(PREGDISPLAY pRD, EECodeInfo* pCodeInfo / pRD->pCurrentContextPointers = pRD->pCallerContextPointers; pRD->pCallerContextPointers = tempPtrs; +#if defined(TARGET_ARM64) + TADDR tempSpForPacSign = pRD->CurrentContextSpForPacSign; + pRD->CurrentContextSpForPacSign = pRD->CallerContextSpForPacSign; + pRD->CallerContextSpForPacSign = tempSpForPacSign; +#endif // TARGET_ARM64 + #ifdef TARGET_X86 pRD->PCTAddr = pRD->pCurrentContext->Esp - pCodeInfo->GetCodeManager()->GetStackParameterSize(pCodeInfo) - sizeof(DWORD); #endif @@ -388,7 +394,11 @@ UINT_PTR Thread::VirtualUnwindCallFrame(PREGDISPLAY pRD, EECodeInfo* pCodeInfo / pRD->pCurrentContext->Esp = pRD->SP; pRD->pCurrentContext->Eip = pRD->ControlPC; #else - VirtualUnwindCallFrame(pRD->pCurrentContext, pRD->pCurrentContextPointers, pCodeInfo); + ARM64_ONLY(pRD->CurrentContextSpForPacSign = 0;) + VirtualUnwindCallFrame(pRD->pCurrentContext, + pRD->pCurrentContextPointers, + pCodeInfo + ARM64_ARG(&pRD->CurrentContextSpForPacSign)); #endif } @@ -409,7 +419,8 @@ UINT_PTR Thread::VirtualUnwindCallFrame(PREGDISPLAY pRD, EECodeInfo* pCodeInfo / // static PCODE Thread::VirtualUnwindCallFrame(T_CONTEXT* pContext, T_KNONVOLATILE_CONTEXT_POINTERS* pContextPointers /*= NULL*/, - EECodeInfo * pCodeInfo /*= NULL*/) + EECodeInfo * pCodeInfo /*= NULL*/ + ARM64_ARG(TADDR * pSpForPacSign /*= NULL*/)) { #ifdef TARGET_WASM _ASSERTE("VirtualUnwindCallFrame is not supported on WebAssembly"); @@ -425,6 +436,10 @@ PCODE Thread::VirtualUnwindCallFrame(T_CONTEXT* pContext, } CONTRACTL_END; +#if defined(TARGET_ARM64) && !defined(TARGET_UNIX) + UNREFERENCED_PARAMETER(pSpForPacSign); +#endif // TARGET_ARM64 && !TARGET_UNIX + PCODE uControlPc = GetIP(pContext); #if !defined(DACCESS_COMPILE) @@ -520,6 +535,17 @@ PCODE Thread::VirtualUnwindCallFrame(T_CONTEXT* pContext, #endif // HOST_64BIT PVOID HandlerData; +#if defined(TARGET_UNIX) && defined(TARGET_ARM64) + RtlVirtualUnwindWithSpForPacSign(0, + uImageBase, + uControlPc, + pFunctionEntry, + pContext, + &HandlerData, + &EstablisherFrame, + pContextPointers, + (PULONG64)pSpForPacSign); +#else RtlVirtualUnwind(0, uImageBase, uControlPc, @@ -528,6 +554,7 @@ PCODE Thread::VirtualUnwindCallFrame(T_CONTEXT* pContext, &HandlerData, &EstablisherFrame, pContextPointers); +#endif uControlPc = GetIP(pContext); } diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index a0e7949fe0fd43..633931edbe0d38 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -2184,7 +2184,7 @@ class Thread void FillRegDisplay(const PREGDISPLAY pRD, PT_CONTEXT pctx, bool fLightUnwind = false); static PCODE VirtualUnwindCallFrame(T_CONTEXT* pContext, T_KNONVOLATILE_CONTEXT_POINTERS* pContextPointers = NULL, - EECodeInfo * pCodeInfo = NULL); + EECodeInfo * pCodeInfo = NULL ARM64_ARG(TADDR * pSpForPacSign = NULL)); static UINT_PTR VirtualUnwindCallFrame(PREGDISPLAY pRD, EECodeInfo * pCodeInfo = NULL); #ifndef DACCESS_COMPILE static PCODE VirtualUnwindLeafCallFrame(T_CONTEXT* pContext); diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 4c52f7a49e19c5..a30a8147930743 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -4752,6 +4752,9 @@ StackWalkAction SWCB_GetExecutionState(CrawlFrame *pCF, VOID *pData) pES->m_ppvRetAddrPtr = (void **) pRDT->pCallerContextPointers->Ra; #else pES->m_ppvRetAddrPtr = (void **) pRDT->pCallerContextPointers->Lr; +#if defined(TARGET_ARM64) && defined(TARGET_UNIX) + pES->m_pSpForPacSign = (void *)pRDT->CallerContextSpForPacSign; +#endif // TARGET_ARM64 && TARGET_UNIX #endif } #elif defined(TARGET_X86) @@ -5283,17 +5286,20 @@ BOOL Thread::HandledJITCase() X86_ONLY(ReturnKind returnKind;) X86_ONLY(bool hasAsyncRet;) - ARM64_ONLY(TADDR spForPacSign = 0;) if (GetReturnAddressHijackInfo(&codeInfo X86_ARG(&returnKind) X86_ARG(&hasAsyncRet))) { -#if defined(TARGET_ARM64) - if (!GetPacSignInfo(&ctx, &codeInfo, dac_cast(esb.m_ppvRetAddrPtr), &spForPacSign)) +#if defined(TARGET_ARM64) && !defined(TARGET_UNIX) + if (esb.m_pSpForPacSign == nullptr) { - return FALSE; - } + TADDR spForPacSign = 0; + if (!GetPacSignInfo(&ctx, &codeInfo, dac_cast(esb.m_ppvRetAddrPtr), &spForPacSign)) + { + return FALSE; + } - esb.m_pSpForPacSign = (PVOID)spForPacSign; -#endif // TARGET_ARM64 + esb.m_pSpForPacSign = (PVOID)spForPacSign; + } +#endif // TARGET_ARM64 && !TARGET_UNIX HijackThread(&esb X86_ARG(returnKind) X86_ARG(hasAsyncRet)); } } @@ -5844,15 +5850,18 @@ void HandleSuspensionForInterruptedThread(CONTEXT *interruptedContext) StackWalkerWalkingThreadHolder threadStackWalking(pThread); // Hijack the return address to point to the appropriate routine based on the method's return type. - ARM64_ONLY(TADDR spForPacSign = 0;) -#if defined(TARGET_ARM64) - if (!GetPacSignInfo(interruptedContext, &codeInfo, dac_cast(executionState.m_ppvRetAddrPtr), &spForPacSign)) +#if defined(TARGET_ARM64) && !defined(TARGET_UNIX) + if (executionState.m_pSpForPacSign == nullptr) { - return; - } + TADDR spForPacSign = 0; + if (!GetPacSignInfo(interruptedContext, &codeInfo, dac_cast(executionState.m_ppvRetAddrPtr), &spForPacSign)) + { + return; + } - executionState.m_pSpForPacSign = (PVOID)spForPacSign; -#endif // TARGET_ARM64 + executionState.m_pSpForPacSign = (PVOID)spForPacSign; + } +#endif // TARGET_ARM64 && !TARGET_UNIX pThread->HijackThread(&executionState X86_ARG(returnKind) X86_ARG(hasAsyncRet)); } } From 9a2a161276e3b20512936a18698ad427fecab2b6 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Wed, 20 May 2026 09:46:23 +0100 Subject: [PATCH 46/65] Restore missing THUMB_CODE clearing on Arm32 --- src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 6adfaa7e14f1a9..3a4a264361d6de 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -1868,6 +1868,7 @@ void StackFrameIterator::NextInternal() // if the thread is safe to walk, it better not have a hijack in place. ASSERT(!m_pThread->IsHijacked()); + m_RegDisplay.SetIP(ReturnAddressToCanonicalPC(m_RegDisplay.GetIP())); SetControlPC(dac_cast(m_RegDisplay.GetIP())); PTR_VOID collapsingTargetFrame = NULL; From f4c7ec1b995543ea74797bfdc4b84b666c05e0e3 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 21 May 2026 13:05:38 +0100 Subject: [PATCH 47/65] Switch to use PAL unwinder for Windows to retrieve signing SP for PAC --- .../dlls/mscoree/coreclr/CMakeLists.txt | 4 + src/coreclr/inc/clrnt.h | 3 - src/coreclr/unwinder/CMakeLists.txt | 4 +- src/coreclr/unwinder/arm64/unwinder.cpp | 4 +- src/coreclr/vm/excep.cpp | 226 ------------------ src/coreclr/vm/excep.h | 3 - src/coreclr/vm/stackwalk.cpp | 6 +- src/coreclr/vm/threadsuspend.cpp | 29 +-- 8 files changed, 12 insertions(+), 267 deletions(-) diff --git a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt index 8beacca9ba017a..b40883fbba5fb2 100644 --- a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt +++ b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt @@ -63,6 +63,10 @@ if (CLR_CMAKE_HOST_UNIX AND NOT CLR_CMAKE_TARGET_ARCH_WASM) set(LIB_UNWINDER unwinder_wks) endif (CLR_CMAKE_HOST_UNIX AND NOT CLR_CMAKE_TARGET_ARCH_WASM) +if (CLR_CMAKE_HOST_WIN32 AND CLR_CMAKE_TARGET_ARCH_ARM64) + set(LIB_UNWINDER unwinder_wks) +endif (CLR_CMAKE_HOST_WIN32 AND CLR_CMAKE_TARGET_ARCH_ARM64) + # IMPORTANT! Please do not rearrange the order of the libraries. The linker on Linux is # order dependent and changing the order can result in undefined symbols in the shared # library. diff --git a/src/coreclr/inc/clrnt.h b/src/coreclr/inc/clrnt.h index 58252127ec1c16..d08ff08614b28c 100644 --- a/src/coreclr/inc/clrnt.h +++ b/src/coreclr/inc/clrnt.h @@ -365,9 +365,7 @@ RtlVirtualUnwind( IN OUT PKNONVOLATILE_CONTEXT_POINTERS ContextPointers OPTIONAL ); -#if defined(TARGET_UNIX) EXTERN_C -NTSYSAPI PEXCEPTION_ROUTINE NTAPI RtlVirtualUnwindWithSpForPacSign( @@ -381,7 +379,6 @@ RtlVirtualUnwindWithSpForPacSign( IN OUT PKNONVOLATILE_CONTEXT_POINTERS ContextPointers OPTIONAL, OUT PULONG64 SpForPacSign OPTIONAL ); -#endif // TARGET_UNIX // Mirror the XSTATE_ARM64_SVE flags from winnt.h diff --git a/src/coreclr/unwinder/CMakeLists.txt b/src/coreclr/unwinder/CMakeLists.txt index 1c82808f0366a4..dd5e9dd4a2e0e6 100644 --- a/src/coreclr/unwinder/CMakeLists.txt +++ b/src/coreclr/unwinder/CMakeLists.txt @@ -19,10 +19,10 @@ list(APPEND UNWINDER_SOURCES convert_to_absolute_path(UNWINDER_SOURCES ${UNWINDER_SOURCES}) -if(CLR_CMAKE_HOST_UNIX) +if(CLR_CMAKE_HOST_UNIX OR (CLR_CMAKE_HOST_WIN32 AND CLR_CMAKE_TARGET_ARCH_ARM64)) add_library_clr(unwinder_wks OBJECT ${UNWINDER_SOURCES}) add_dependencies(unwinder_wks eventing_headers) -endif(CLR_CMAKE_HOST_UNIX) +endif(CLR_CMAKE_HOST_UNIX OR (CLR_CMAKE_HOST_WIN32 AND CLR_CMAKE_TARGET_ARCH_ARM64)) add_library_clr(unwinder_dac ${UNWINDER_SOURCES}) add_dependencies(unwinder_dac eventing_headers) diff --git a/src/coreclr/unwinder/arm64/unwinder.cpp b/src/coreclr/unwinder/arm64/unwinder.cpp index e4d555eb08065a..15d9e46bcbeea5 100644 --- a/src/coreclr/unwinder/arm64/unwinder.cpp +++ b/src/coreclr/unwinder/arm64/unwinder.cpp @@ -2900,8 +2900,11 @@ RtlVirtualUnwind( return HandlerRoutine; } +#endif // HOST_UNIX +EXTERN_C PEXCEPTION_ROUTINE +NTAPI RtlVirtualUnwindWithSpForPacSign( IN ULONG HandlerType, IN ULONG64 ImageBase, @@ -2941,4 +2944,3 @@ RtlVirtualUnwindWithSpForPacSign( return HandlerRoutine; } -#endif diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 479ec3318c1f1c..5e87f606da0be9 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6312,232 +6312,6 @@ bool IsIPInEpilog(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, BOOL *pSaf return fIsInEpilog; } -#if defined(TARGET_ARM64) && !defined(TARGET_UNIX) -// Read the PAC state for a managed ARM64 frame and, when PAC is enabled, recover the -// SP value that was live when PACIASP signed the return address in LR. Unix ARM64 gets -// this directly from the PAL unwinder while it is unwinding the frame. This fallback -// is used by the Windows hijack path in threadsuspend.cpp. -bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR retAddrLocation, TADDR *pSpForPacSign) -{ - _ASSERTE(pContextToCheck != nullptr); - _ASSERTE(pCodeInfo->IsValid()); - _ASSERTE(pSpForPacSign != nullptr); - - *pSpForPacSign = 0; - - // Lookup the function entry for the IP - PTR_RUNTIME_FUNCTION FunctionEntry = pCodeInfo->GetFunctionEntry(); - - // We should always get a function entry for a managed method - _ASSERTE(FunctionEntry != NULL); - DWORD_PTR ImageBase = pCodeInfo->GetModuleBase(); - - _ASSERTE((FunctionEntry->UnwindData & 3) == 0); // Packed unwind data are not used with managed code - ULONG_PTR UnwindDataPtr = (ULONG_PTR)(ImageBase + FunctionEntry->UnwindData); - - // For unwind info layout details refer https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling?view=msvc-170#arm64-exception-handling-information - // Read the header word. - DWORD HeaderWord = *(DWORD*)UnwindDataPtr; - UnwindDataPtr += sizeof(DWORD); - - _ASSERTE(((HeaderWord >> 18) & 3) == 0); // Version 0 is the only supported version. - - ULONG UnwindWords = (HeaderWord >> 27) & 31; - ULONG EpilogScopeCount = (HeaderWord >> 22) & 31; - if (EpilogScopeCount == 0 && UnwindWords == 0) - { - DWORD extendedCounts = *(DWORD*)UnwindDataPtr; - UnwindDataPtr += sizeof(DWORD); - UnwindWords = (extendedCounts >> 16) & 0xFF; - EpilogScopeCount = extendedCounts & 0xFFFF; - } - - if ((HeaderWord & (1 << 21)) != 0) - { - EpilogScopeCount = 0; - } - - ULONG_PTR UnwindCodePtr = UnwindDataPtr + sizeof(DWORD) * EpilogScopeCount; - ULONG_PTR UnwindCodesEndPtr = UnwindCodePtr + sizeof(DWORD) * UnwindWords; - - auto GetUnwindOpSize = [](BYTE unwindCode) -> SIZE_T - { - if (unwindCode < 0xC0) - { - return 1; - } - else if (unwindCode < 0xE0) - { - return 2; - } - else - { - static const BYTE UnwindCodeSizeTable[32] = - { - 4,1,2,1,1,1,1,3, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 2,3,4,5,1,1,1,1 - }; - - return UnwindCodeSizeTable[unwindCode - 0xE0]; - } - }; - - ULONG_PTR* unwindOpStarts = (ULONG_PTR*)_alloca((UnwindCodesEndPtr - UnwindCodePtr) * sizeof(ULONG_PTR)); - ULONG unwindOpIndex = 0; - for (ULONG_PTR unwindOpPtr = UnwindCodePtr; unwindOpPtr < UnwindCodesEndPtr;) - { - BYTE curCode = *(BYTE*)unwindOpPtr; - if (curCode == 0xE4) // end - { - break; - } - - SIZE_T unwindOpSize = GetUnwindOpSize(curCode); - if ((unwindOpPtr + unwindOpSize) > UnwindCodesEndPtr) - { - return false; - } - - unwindOpStarts[unwindOpIndex++] = unwindOpPtr; - unwindOpPtr += unwindOpSize; - } - - SSIZE_T currentSpOffset = 0; - SSIZE_T lrSlotOffset = SSIZE_T_MIN; - SSIZE_T pacSpOffset = 0; - BOOL hasPacSignLR = false; - constexpr SSIZE_T PtrSize = 8; - - // ARM64 prolog unwind codes are stored in reverse prolog order. Replay them in prolog order so - // PACIASP/PACIBSP captures the SP that was live when LR was originally signed. - while (unwindOpIndex != 0) - { - UnwindCodePtr = unwindOpStarts[--unwindOpIndex]; - ULONG CurCode = *(BYTE*)UnwindCodePtr; - - if (((CurCode & 0xFC) == 0xC8) || // save_regp - ((CurCode & 0xFE) == 0xD8) || // save_fregp - ((CurCode & 0xFE) == 0xDC) || // save_freg - CurCode == 0xE1 || // set_fp - CurCode == 0xE2 || // add_fp - CurCode == 0xE3 || // nop - CurCode == 0xE5 || // end_c - CurCode == 0xE6) // save_next - { - continue; - } - - if ((CurCode & 0xE0) == 0x00) // alloc_s - { - currentSpOffset -= (CurCode & 0x1F) * 16; - continue; - } - - if ((CurCode & 0xE0) == 0x20) // save_r19r20_x - { - currentSpOffset -= (CurCode & 0x1F) * 8; - continue; - } - - if ((CurCode & 0xC0) == 0x40) // save_fplr - { - lrSlotOffset = currentSpOffset + ((CurCode & 0x3F) * 8) + PtrSize; - continue; - } - - if ((CurCode & 0xC0) == 0x80) // save_fplr_x - { - currentSpOffset -= ((CurCode & 0x3F) + 1) * 8; - lrSlotOffset = currentSpOffset + PtrSize; - continue; - } - - if ((CurCode & 0xF8) == 0xC0) // alloc_m - { - ULONG x = ((CurCode & 0x7) << 8) | *(BYTE*)(UnwindCodePtr + 1); - currentSpOffset -= x * 16; - continue; - } - - if (((CurCode & 0xFC) == 0xCC) || // save_regp_x - ((CurCode & 0xFE) == 0xDA)) // save_fregp_x - { - ULONG z = *(BYTE*)(UnwindCodePtr + 1) & 0x3F; - currentSpOffset -= (z + 1) * 8; - continue; - } - - if ((CurCode & 0xFC) == 0xD0) // save_reg - { - BYTE nextCode = *(BYTE*)(UnwindCodePtr + 1); - ULONG x = ((CurCode & 0x3) << 2) | (nextCode >> 6); - ULONG z = nextCode & 0x3F; - if (x == 11) // R30 / LR is the 12th GP register in the save_reg encodings - { - lrSlotOffset = currentSpOffset + z * 8; - } - - continue; - } - - if ((CurCode & 0xFE) == 0xD4) // save_reg_x - { - BYTE nextCode = *(BYTE*)(UnwindCodePtr + 1); - ULONG x = ((CurCode & 0x1) << 3) | (nextCode >> 5); - currentSpOffset -= ((nextCode & 0x1F) + 1) * 8; - if (x == 11) // R30 / LR is the 12th GP register in the save_reg encodings - { - lrSlotOffset = currentSpOffset; - } - - continue; - } - - if ((CurCode & 0xFE) == 0xD6) // save_lrpair - { - ULONG z = *(BYTE*)(UnwindCodePtr + 1) & 0x3F; - lrSlotOffset = currentSpOffset + z * 8 + PtrSize; - continue; - } - - if (CurCode == 0xDE) // save_freg_x - { - ULONG z = *(BYTE*)(UnwindCodePtr + 1) & 0x1F; - currentSpOffset -= (z + 1) * 8; - continue; - } - - if (CurCode == 0xE0) // alloc_l - { - ULONG x = (*(BYTE*)(UnwindCodePtr + 1) << 16) | (*(BYTE*)(UnwindCodePtr + 2) << 8) | *(BYTE*)(UnwindCodePtr + 3); - currentSpOffset -= x * 16; - continue; - } - - if (CurCode == 0xFC) // pac_sign_lr - { - pacSpOffset = currentSpOffset; - hasPacSignLR = true; - continue; - } - - return false; - } - - if (!hasPacSignLR) - { - return true; - } - - if (lrSlotOffset == SSIZE_T_MIN) - { - return false; - } - - *pSpForPacSign = (TADDR)((SSIZE_T)retAddrLocation - (lrSlotOffset - pacSpOffset)); - return true; -} -#endif // TARGET_ARM64 && !TARGET_UNIX - #endif // FEATURE_HIJACK && (!TARGET_X86 || TARGET_UNIX) #define EXCEPTION_VISUALCPP_DEBUGGER ((DWORD) (1<<30 | 0x6D<<16 | 5000)) diff --git a/src/coreclr/vm/excep.h b/src/coreclr/vm/excep.h index 8fb299a3955efd..472782de483d33 100644 --- a/src/coreclr/vm/excep.h +++ b/src/coreclr/vm/excep.h @@ -29,9 +29,6 @@ BOOL AdjustContextForJITHelpers(EXCEPTION_RECORD *pExceptionRecord, CONTEXT *pCo // General purpose functions for use on an IP in jitted code. bool IsIPInProlog(EECodeInfo *pCodeInfo); bool IsIPInEpilog(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, BOOL *pSafeToInjectThreadAbort); -#if defined(TARGET_ARM64) && !defined(TARGET_UNIX) -bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR retAddrLocation, TADDR *pSpForPacSign); -#endif // TARGET_ARM64 && !TARGET_UNIX #endif // FEATURE_HIJACK && (!TARGET_X86 || TARGET_UNIX) // Enums diff --git a/src/coreclr/vm/stackwalk.cpp b/src/coreclr/vm/stackwalk.cpp index 8a79f4142af09f..7ea2bdc91c7aa3 100644 --- a/src/coreclr/vm/stackwalk.cpp +++ b/src/coreclr/vm/stackwalk.cpp @@ -436,10 +436,6 @@ PCODE Thread::VirtualUnwindCallFrame(T_CONTEXT* pContext, } CONTRACTL_END; -#if defined(TARGET_ARM64) && !defined(TARGET_UNIX) - UNREFERENCED_PARAMETER(pSpForPacSign); -#endif // TARGET_ARM64 && !TARGET_UNIX - PCODE uControlPc = GetIP(pContext); #if !defined(DACCESS_COMPILE) @@ -535,7 +531,7 @@ PCODE Thread::VirtualUnwindCallFrame(T_CONTEXT* pContext, #endif // HOST_64BIT PVOID HandlerData; -#if defined(TARGET_UNIX) && defined(TARGET_ARM64) +#if defined(TARGET_ARM64) RtlVirtualUnwindWithSpForPacSign(0, uImageBase, uControlPc, diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index a30a8147930743..9fc9c5cd44b08d 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -4752,9 +4752,9 @@ StackWalkAction SWCB_GetExecutionState(CrawlFrame *pCF, VOID *pData) pES->m_ppvRetAddrPtr = (void **) pRDT->pCallerContextPointers->Ra; #else pES->m_ppvRetAddrPtr = (void **) pRDT->pCallerContextPointers->Lr; -#if defined(TARGET_ARM64) && defined(TARGET_UNIX) +#if defined(TARGET_ARM64) pES->m_pSpForPacSign = (void *)pRDT->CallerContextSpForPacSign; -#endif // TARGET_ARM64 && TARGET_UNIX +#endif // TARGET_ARM64 #endif } #elif defined(TARGET_X86) @@ -5288,18 +5288,6 @@ BOOL Thread::HandledJITCase() X86_ONLY(bool hasAsyncRet;) if (GetReturnAddressHijackInfo(&codeInfo X86_ARG(&returnKind) X86_ARG(&hasAsyncRet))) { -#if defined(TARGET_ARM64) && !defined(TARGET_UNIX) - if (esb.m_pSpForPacSign == nullptr) - { - TADDR spForPacSign = 0; - if (!GetPacSignInfo(&ctx, &codeInfo, dac_cast(esb.m_ppvRetAddrPtr), &spForPacSign)) - { - return FALSE; - } - - esb.m_pSpForPacSign = (PVOID)spForPacSign; - } -#endif // TARGET_ARM64 && !TARGET_UNIX HijackThread(&esb X86_ARG(returnKind) X86_ARG(hasAsyncRet)); } } @@ -5849,19 +5837,6 @@ void HandleSuspensionForInterruptedThread(CONTEXT *interruptedContext) // This is necessary to allow the signature parsing functions to work without triggering any loads. StackWalkerWalkingThreadHolder threadStackWalking(pThread); - // Hijack the return address to point to the appropriate routine based on the method's return type. -#if defined(TARGET_ARM64) && !defined(TARGET_UNIX) - if (executionState.m_pSpForPacSign == nullptr) - { - TADDR spForPacSign = 0; - if (!GetPacSignInfo(interruptedContext, &codeInfo, dac_cast(executionState.m_ppvRetAddrPtr), &spForPacSign)) - { - return; - } - - executionState.m_pSpForPacSign = (PVOID)spForPacSign; - } -#endif // TARGET_ARM64 && !TARGET_UNIX pThread->HijackThread(&executionState X86_ARG(returnKind) X86_ARG(hasAsyncRet)); } } From 82a8ec442fc78dead3e17124cac73c453564cfdd Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 21 May 2026 14:14:57 +0100 Subject: [PATCH 48/65] Try fixing build failures on Windows --- src/coreclr/inc/clrnt.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/inc/clrnt.h b/src/coreclr/inc/clrnt.h index d08ff08614b28c..0dba5f47fba535 100644 --- a/src/coreclr/inc/clrnt.h +++ b/src/coreclr/inc/clrnt.h @@ -372,11 +372,11 @@ RtlVirtualUnwindWithSpForPacSign( IN ULONG HandlerType, IN ULONG64 ImageBase, IN ULONG64 ControlPc, - IN PRUNTIME_FUNCTION FunctionEntry, - IN OUT PCONTEXT ContextRecord, + IN PT_RUNTIME_FUNCTION FunctionEntry, + IN OUT PT_CONTEXT ContextRecord, OUT PVOID *HandlerData, OUT PULONG64 EstablisherFrame, - IN OUT PKNONVOLATILE_CONTEXT_POINTERS ContextPointers OPTIONAL, + IN OUT PT_KNONVOLATILE_CONTEXT_POINTERS ContextPointers OPTIONAL, OUT PULONG64 SpForPacSign OPTIONAL ); From e44b17ab724fee2fd0d9174c97fed66a9c624fa6 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 28 May 2026 15:53:07 +0100 Subject: [PATCH 49/65] Update JIT GUID temporarilty to enable SPMI tests --- src/coreclr/inc/jiteeversionguid.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 9816fd7d1744fa..57c688f8516331 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -37,11 +37,11 @@ #include -constexpr GUID JITEEVersionIdentifier = { /* fc5f63e7-921b-4091-b920-8df8d7b872c1 */ - 0xfc5f63e7, - 0x921b, - 0x4091, - {0xb9, 0x20, 0x8d, 0xf8, 0xd7, 0xb8, 0x72, 0xc1} +constexpr GUID JITEEVersionIdentifier = { /* 91460d2c-d6a8-460b-a95f-8342301bebe8 */ + 0x91460d2c, + 0xd6a8, + 0x460b, + {0xa9, 0x5f, 0x83, 0x42, 0x30, 0x1b, 0xeb, 0xe8} }; #endif // JIT_EE_VERSIONING_GUID_H From 3ef9668efe00565278968863089b14103ea78db3 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 29 May 2026 12:18:01 +0100 Subject: [PATCH 50/65] Address review comments --- src/coreclr/debug/ee/controller.cpp | 4 ---- src/coreclr/vm/arm64/asmhelpers.S | 2 +- src/coreclr/vm/arm64/asmhelpers.asm | 2 +- src/coreclr/vm/arm64/cgencpu.h | 4 +++- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/coreclr/debug/ee/controller.cpp b/src/coreclr/debug/ee/controller.cpp index 8f86df4227d464..52a3fbe68d7024 100644 --- a/src/coreclr/debug/ee/controller.cpp +++ b/src/coreclr/debug/ee/controller.cpp @@ -19,10 +19,6 @@ #include "../../vm/methoditer.h" #include "../../vm/tailcallhelp.h" -#if defined(TARGET_ARM64) -extern "C" void* PacStripPtr(void* ptr); -#endif // TARGET_ARM64 - const char *GetTType( TraceType tt); #define IsSingleStep(exception) ((exception) == EXCEPTION_SINGLE_STEP) diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 30b624a1f7309e..f4b2cd1b9d8ec1 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -147,7 +147,7 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler EPILOG_RESTORE_REG_PAIR x25, x26, 64 EPILOG_RESTORE_REG_PAIR x27, x28, 80 EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 192 - xpaclri + autiasp EPILOG_RETURN NESTED_END OnHijackTripThread, _TEXT diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 967fd066e3b947..6a6a462094c9e5 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -312,7 +312,7 @@ NoFloatingPointRetVal EPILOG_RESTORE_REG_PAIR x27, x28, #80 EPILOG_RESTORE_REG_PAIR fp, lr, #192! - DCD 0xD50320FF ; xpaclri instruction in binary to avoid error while compiling with non-PAC enabled compilers + DCD 0xD50323FF ; autibsp EPILOG_RETURN NESTED_END diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index 802fcce7af29a7..1e14347ad553b0 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -47,6 +47,8 @@ extern PCODE GetPreStubEntryPoint(); #ifndef DACCESS_COMPILE extern "C" void* PacAuthPtr(void* ptr, void* sp); +extern "C" void* PacSignPtr(void* ptr, void* sp); +extern "C" void* PacStripPtr(void* ptr); #endif #define STACK_ALIGN_SIZE 16 @@ -212,7 +214,7 @@ typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA inline PCODE GetIP(const T_CONTEXT * context) { LIMITED_METHOD_DAC_CONTRACT; - return (PCODE) context->Pc; + return context->Pc; } inline void SetIP(T_CONTEXT *context, PCODE eip) { From a66dd10e1a377c6d8f20d43be894cb520af59bf9 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 29 May 2026 15:57:44 +0100 Subject: [PATCH 51/65] Pass signing sp for managed frame to hijack frame --- src/coreclr/vm/arm64/asmhelpers.S | 11 +++++++---- src/coreclr/vm/arm64/asmhelpers.asm | 10 ++++++---- src/coreclr/vm/arm64/cgencpu.h | 1 + src/coreclr/vm/threadsuspend.cpp | 3 +++ 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index f4b2cd1b9d8ec1..2ab634f01092e4 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -109,8 +109,9 @@ NESTED_END TheUMEntryPrestub, _TEXT #ifdef FEATURE_HIJACK // ------------------------------------------------------------------ // Hijack function for functions which return a scalar type or a struct (value type) +.arch_extension pauth NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler - PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -192 + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -208 // Spill callee saved registers PROLOG_SAVE_REG_PAIR x19, x20, 16 PROLOG_SAVE_REG_PAIR x21, x22, 32 @@ -146,8 +147,11 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler EPILOG_RESTORE_REG_PAIR x23, x24, 48 EPILOG_RESTORE_REG_PAIR x25, x26, 64 EPILOG_RESTORE_REG_PAIR x27, x28, 80 - EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 192 - autiasp + ldr x16, [sp, #192] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 208 + cbz x16, LOCAL_LABEL(HijackReturn) + autia lr, x16 +LOCAL_LABEL(HijackReturn): EPILOG_RETURN NESTED_END OnHijackTripThread, _TEXT @@ -3548,4 +3552,3 @@ NESTED_ENTRY IL_Rethrow, _TEXT, NoHandler // Should never return brk #0 NESTED_END IL_Rethrow, _TEXT - diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 6a6a462094c9e5..67374ef910068b 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -274,7 +274,7 @@ NoFloatingPointRetVal ; ------------------------------------------------------------------ ; Hijack function for functions which return a scalar type or a struct (value type) NESTED_ENTRY OnHijackTripThread - PROLOG_SAVE_REG_PAIR fp, lr, #-192! + PROLOG_SAVE_REG_PAIR fp, lr, #-208! ; Spill callee saved registers PROLOG_SAVE_REG_PAIR x19, x20, #16 PROLOG_SAVE_REG_PAIR x21, x22, #32 @@ -310,9 +310,11 @@ NoFloatingPointRetVal EPILOG_RESTORE_REG_PAIR x23, x24, #48 EPILOG_RESTORE_REG_PAIR x25, x26, #64 EPILOG_RESTORE_REG_PAIR x27, x28, #80 - EPILOG_RESTORE_REG_PAIR fp, lr, #192! - - DCD 0xD50323FF ; autibsp + ldr x16, [sp, #192] + EPILOG_RESTORE_REG_PAIR fp, lr, #208! + cbz x16, OnHijackTripThreadReturn + DCD 0xDAC1161E ; autib lr, x16 instruction in binary to avoid requiring PAC-enabled assemblers +OnHijackTripThreadReturn EPILOG_RETURN NESTED_END diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index 1e14347ad553b0..b00906f3e9fecc 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -530,6 +530,7 @@ struct HijackArgs }; NEON128 FPReturnValue[4]; }; + DWORD64 SpForPacSign; }; #endif // __cgencpu_h__ diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 9fc9c5cd44b08d..c0caa31784a7a6 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -4840,6 +4840,9 @@ void STDCALL OnHijackWorker(HijackArgs * pArgs) // authenticates the return address on demand for stackwalk/GC, but // OnHijackTripThread will later return via the saved LR in HijackArgs. pArgs->ReturnAddress = (size_t)thread->m_pvHJRetAddr; +#if defined(TARGET_ARM64) + pArgs->SpForPacSign = (size_t)thread->m_pSpForPacSign; +#endif // TARGET_ARM64 // Build a frame so that stack crawling can proceed from here back to where // we will resume execution. From d539d9d682f052a3643f2d3dce49091d691c88b3 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Mon, 1 Jun 2026 11:29:42 +0100 Subject: [PATCH 52/65] Strip return address in cdac unwinder --- src/coreclr/unwinder/arm64/unwinder.cpp | 4 ---- src/coreclr/vm/arm64/asmhelpers.S | 8 ++------ src/coreclr/vm/arm64/asmhelpers.asm | 8 ++------ .../Contracts/StackWalk/Context/ARM64/ARM64Unwinder.cs | 4 +--- 4 files changed, 5 insertions(+), 19 deletions(-) diff --git a/src/coreclr/unwinder/arm64/unwinder.cpp b/src/coreclr/unwinder/arm64/unwinder.cpp index 15d9e46bcbeea5..828fe3fa2d5391 100644 --- a/src/coreclr/unwinder/arm64/unwinder.cpp +++ b/src/coreclr/unwinder/arm64/unwinder.cpp @@ -2412,10 +2412,6 @@ Return Value: } HANDLE_PAC(&ContextRecord->Lr, ContextRecord->Sp); - - // - // TODO: Implement support for UnwindFlags RTL_VIRTUAL_UNWIND2_VALIDATE_PAC. - // } // diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 2ab634f01092e4..880849df5c4564 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -167,9 +167,7 @@ NESTED_END OnHijackTripThread, _TEXT LEAF_END PacStripPtr, _TEXT // void* PacSignPtr(void *, void *); -// This function signs the input pointer using x1 as salt. -// To avoid failing on non-PAC enabled machines, we use pacia1716 which signs lr explicitly. -// Thus we need to move input in lr, sign it and then copy it back to the result register. +// This function signs the input pointer using x1 as salt. It is a no-op on non-PAC enabled machines. .arch_extension pauth LEAF_ENTRY PacSignPtr, _TEXT mov x17, x0 @@ -180,9 +178,7 @@ NESTED_END OnHijackTripThread, _TEXT LEAF_END PacSignPtr, _TEXT // void* PacAuthPtr(void *, void *); -// This function authenticates the input signed-pointer using x1 as salt. -// To avoid failing on non-PAC enabled machines, we use pacia1716 which authenticates lr explicitly. -// Thus we need to move input in lr, authenticate it and then copy it back to the result register. +// This function authenticates the input signed-pointer using x1 as salt. It is a no-op on non-PAC enabled machines. .arch_extension pauth LEAF_ENTRY PacAuthPtr, _TEXT mov x17, x0 diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 67374ef910068b..429037381b0040 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -329,9 +329,7 @@ OnHijackTripThreadReturn LEAF_END PacStripPtr ; void* PacSignPtr(void *, void *); -; This function signs the input pointer using x1 as salt. -; To avoid failing on non-PAC enabled machines, we use pacib1716 which signs lr explicitly. -; Thus we need to move input in lr, sign it and then copy it back to the result register. +; This function signs the input pointer using x1 as salt. It is a no-op on non-PAC enabled machines. LEAF_ENTRY PacSignPtr mov x17, x0 mov x16, x1 @@ -341,9 +339,7 @@ OnHijackTripThreadReturn LEAF_END PacSignPtr ; void* PacAuthPtr(void *, void *); -; This function authenticates the input signed-pointer using x1 as salt. -; To avoid failing on non-PAC enabled machines, we use autib1716 which authenticates lr explicitly. -; Thus we need to move input in lr, authenticate it and then copy it back to the result register. +; This function authenticates the input signed-pointer using x1 as salt. It is a no-op on non-PAC enabled machines. LEAF_ENTRY PacAuthPtr mov x17, x0 mov x16, x1 diff --git a/src/native/managed/cdac/Microsoft.Diagnostics.DataContractReader.Contracts/Contracts/StackWalk/Context/ARM64/ARM64Unwinder.cs b/src/native/managed/cdac/Microsoft.Diagnostics.DataContractReader.Contracts/Contracts/StackWalk/Context/ARM64/ARM64Unwinder.cs index 625f972958ca23..a2b59cd4a2c0af 100644 --- a/src/native/managed/cdac/Microsoft.Diagnostics.DataContractReader.Contracts/Contracts/StackWalk/Context/ARM64/ARM64Unwinder.cs +++ b/src/native/managed/cdac/Microsoft.Diagnostics.DataContractReader.Contracts/Contracts/StackWalk/Context/ARM64/ARM64Unwinder.cs @@ -763,9 +763,7 @@ private bool VirtualUnwindFull( return false; } - // - // TODO: Implement support for UnwindFlags RTL_VIRTUAL_UNWIND2_VALIDATE_PAC. - // + context.Lr &= 0x0000FFFFFFFFFFFF; } // From 88b9eb2379361b33097f16e5af16208eb2eba1c8 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Mon, 1 Jun 2026 11:59:45 +0100 Subject: [PATCH 53/65] Handle RTL_VIRTUAL_UNWIND2_VALIDATE_PAC in unwinder --- src/coreclr/unwinder/arm64/unwinder.cpp | 34 ++++++++++++++++--------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/coreclr/unwinder/arm64/unwinder.cpp b/src/coreclr/unwinder/arm64/unwinder.cpp index 828fe3fa2d5391..71334f6c96b9bc 100644 --- a/src/coreclr/unwinder/arm64/unwinder.cpp +++ b/src/coreclr/unwinder/arm64/unwinder.cpp @@ -33,6 +33,7 @@ #if !defined(DACCESS_COMPILE) extern "C" void* PacAuthPtr(void* ptr, void* sp); +extern "C" void* PacStripPtr(void* ptr); #endif // !defined(DACCESS_COMPILE) #ifdef HOST_UNIX @@ -256,10 +257,11 @@ do { #endif // !defined(DEBUGGER_UNWIND) -// Macros for stripping pointer authentication (PAC) bits. +// Macros for handling pointer authentication (PAC) bits. #if !defined(DACCESS_COMPILE) #define HANDLE_PAC(pointer, sp) RtlHandlePacOnline(pointer, sp) +#define STRIP_PAC(pointer) RtlStripPacOnline(pointer) FORCEINLINE VOID RtlHandlePacOnline(_Inout_ PULONG64 Pointer, _In_ ULONG64 Sp) @@ -268,9 +270,9 @@ VOID RtlHandlePacOnline(_Inout_ PULONG64 Pointer, _In_ ULONG64 Sp) Routine Description: - This routine authenticates an ARM64 pointer authenticated with PACIASP - using the supplied stack pointer as the modifier. Hence this should only - be called when authenticating a pointer at runtime (not debugger). + This routine authenticates an ARM64 pointer using the supplied stack pointer + as the modifier. Hence this should only be called when authenticating a + pointer at runtime (not debugger). Arguments: @@ -287,15 +289,22 @@ Return Value: { *Pointer = (ULONG64)PacAuthPtr((void *)(*Pointer), (void *)Sp); } + +FORCEINLINE +VOID RtlStripPacOnline(_Inout_ PULONG64 Pointer) +{ + *Pointer = (ULONG64)PacStripPtr((void *)(*Pointer)); +} + #else -#define HANDLE_PAC(pointer, sp) RtlStripPacManual(pointer, sp) +#define HANDLE_PAC(pointer, sp) RtlStripPacManual(pointer) +#define STRIP_PAC(pointer) RtlStripPacManual(pointer) FORCEINLINE VOID RtlStripPacManual( - _Inout_ PULONG64 Pointer, - _In_ ULONG64 Sp + _Inout_ PULONG64 Pointer ) /*++ @@ -319,7 +328,6 @@ Return Value: --*/ { - UNREFERENCED_PARAMETER(Sp); *Pointer &= 0x0000FFFFFFFFFFFF; return; } @@ -1787,8 +1795,6 @@ Return Value: ULONG UnwindIndex; ULONG UnwindWords; - UNREFERENCED_PARAMETER(UnwindFlags); - // // Unless a special frame is encountered, assume that any unwinding // will return us to the return address of a call and set the flag @@ -2411,7 +2417,11 @@ Return Value: *UnwindParams->SpForPacSign = ContextRecord->Sp; } - HANDLE_PAC(&ContextRecord->Lr, ContextRecord->Sp); + if ((UnwindFlags & RTL_VIRTUAL_UNWIND2_VALIDATE_PAC) != 0) { + HANDLE_PAC(&ContextRecord->Lr, ContextRecord->Sp); + } else { + STRIP_PAC(&ContextRecord->Lr); + } } // @@ -2929,7 +2939,7 @@ RtlVirtualUnwindWithSpForPacSign( NULL, &HandlerRoutine, (PULONG_PTR)SpForPacSign, - 0); + RTL_VIRTUAL_UNWIND2_VALIDATE_PAC); if (!NT_SUCCESS(Status)) { ContextRecord->Pc = 0; From 363156e15ef7c385dda2fe1be98f5100322fcc61 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Mon, 1 Jun 2026 12:04:31 +0100 Subject: [PATCH 54/65] Revert "Handle RTL_VIRTUAL_UNWIND2_VALIDATE_PAC in unwinder" This reverts commit 88b9eb2379361b33097f16e5af16208eb2eba1c8. --- src/coreclr/unwinder/arm64/unwinder.cpp | 38 +++++++++++-------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/src/coreclr/unwinder/arm64/unwinder.cpp b/src/coreclr/unwinder/arm64/unwinder.cpp index 71334f6c96b9bc..15d9e46bcbeea5 100644 --- a/src/coreclr/unwinder/arm64/unwinder.cpp +++ b/src/coreclr/unwinder/arm64/unwinder.cpp @@ -33,7 +33,6 @@ #if !defined(DACCESS_COMPILE) extern "C" void* PacAuthPtr(void* ptr, void* sp); -extern "C" void* PacStripPtr(void* ptr); #endif // !defined(DACCESS_COMPILE) #ifdef HOST_UNIX @@ -257,11 +256,10 @@ do { #endif // !defined(DEBUGGER_UNWIND) -// Macros for handling pointer authentication (PAC) bits. +// Macros for stripping pointer authentication (PAC) bits. #if !defined(DACCESS_COMPILE) #define HANDLE_PAC(pointer, sp) RtlHandlePacOnline(pointer, sp) -#define STRIP_PAC(pointer) RtlStripPacOnline(pointer) FORCEINLINE VOID RtlHandlePacOnline(_Inout_ PULONG64 Pointer, _In_ ULONG64 Sp) @@ -270,9 +268,9 @@ VOID RtlHandlePacOnline(_Inout_ PULONG64 Pointer, _In_ ULONG64 Sp) Routine Description: - This routine authenticates an ARM64 pointer using the supplied stack pointer - as the modifier. Hence this should only be called when authenticating a - pointer at runtime (not debugger). + This routine authenticates an ARM64 pointer authenticated with PACIASP + using the supplied stack pointer as the modifier. Hence this should only + be called when authenticating a pointer at runtime (not debugger). Arguments: @@ -289,22 +287,15 @@ Return Value: { *Pointer = (ULONG64)PacAuthPtr((void *)(*Pointer), (void *)Sp); } - -FORCEINLINE -VOID RtlStripPacOnline(_Inout_ PULONG64 Pointer) -{ - *Pointer = (ULONG64)PacStripPtr((void *)(*Pointer)); -} - #else -#define HANDLE_PAC(pointer, sp) RtlStripPacManual(pointer) -#define STRIP_PAC(pointer) RtlStripPacManual(pointer) +#define HANDLE_PAC(pointer, sp) RtlStripPacManual(pointer, sp) FORCEINLINE VOID RtlStripPacManual( - _Inout_ PULONG64 Pointer + _Inout_ PULONG64 Pointer, + _In_ ULONG64 Sp ) /*++ @@ -328,6 +319,7 @@ Return Value: --*/ { + UNREFERENCED_PARAMETER(Sp); *Pointer &= 0x0000FFFFFFFFFFFF; return; } @@ -1795,6 +1787,8 @@ Return Value: ULONG UnwindIndex; ULONG UnwindWords; + UNREFERENCED_PARAMETER(UnwindFlags); + // // Unless a special frame is encountered, assume that any unwinding // will return us to the return address of a call and set the flag @@ -2417,11 +2411,11 @@ Return Value: *UnwindParams->SpForPacSign = ContextRecord->Sp; } - if ((UnwindFlags & RTL_VIRTUAL_UNWIND2_VALIDATE_PAC) != 0) { - HANDLE_PAC(&ContextRecord->Lr, ContextRecord->Sp); - } else { - STRIP_PAC(&ContextRecord->Lr); - } + HANDLE_PAC(&ContextRecord->Lr, ContextRecord->Sp); + + // + // TODO: Implement support for UnwindFlags RTL_VIRTUAL_UNWIND2_VALIDATE_PAC. + // } // @@ -2939,7 +2933,7 @@ RtlVirtualUnwindWithSpForPacSign( NULL, &HandlerRoutine, (PULONG_PTR)SpForPacSign, - RTL_VIRTUAL_UNWIND2_VALIDATE_PAC); + 0); if (!NT_SUCCESS(Status)) { ContextRecord->Pc = 0; From d5a1b3cfed36518bc56d3a5592e449d6ca83d0d2 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Tue, 2 Jun 2026 10:00:46 +0100 Subject: [PATCH 55/65] Use ARM64_ARG macro for Arm64 specific args in HijackFrame --- src/coreclr/vm/frames.h | 6 +----- src/coreclr/vm/threadsuspend.cpp | 12 ++---------- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/src/coreclr/vm/frames.h b/src/coreclr/vm/frames.h index 84534490fb47a6..fa6a8df59a4799 100644 --- a/src/coreclr/vm/frames.h +++ b/src/coreclr/vm/frames.h @@ -1397,11 +1397,7 @@ class HijackFrame : public Frame // HijackFrames are created by trip functions. See OnHijackTripThread() // They are real C++ objects on the stack. // So, it's a public function -- but that doesn't mean you should make some. - HijackFrame(LPVOID returnAddress, Thread *thread, HijackArgs *args -#if defined(TARGET_ARM64) - , LPVOID spForPacSign -#endif - ); + HijackFrame(LPVOID returnAddress, Thread *thread, HijackArgs *args ARM64_ARG(LPVOID spForPacSign)); protected: diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index c0caa31784a7a6..af977498de6425 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -4796,11 +4796,7 @@ StackWalkAction SWCB_GetExecutionState(CrawlFrame *pCF, VOID *pData) return action; } -HijackFrame::HijackFrame(LPVOID returnAddress, Thread *thread, HijackArgs *args -#if defined(TARGET_ARM64) - , LPVOID spForPacSign -#endif - ) +HijackFrame::HijackFrame(LPVOID returnAddress, Thread *thread, HijackArgs *args ARM64_ARG(LPVOID spForPacSign)) : Frame(FrameIdentifier::HijackFrame), m_ReturnAddress((TADDR)returnAddress), #if defined(TARGET_ARM64) @@ -4846,11 +4842,7 @@ void STDCALL OnHijackWorker(HijackArgs * pArgs) // Build a frame so that stack crawling can proceed from here back to where // we will resume execution. - HijackFrame frame(thread->m_pvHJRetAddr, thread, pArgs -#if defined(TARGET_ARM64) - , thread->m_pSpForPacSign -#endif - ); + HijackFrame frame(thread->m_pvHJRetAddr, thread, pArgs ARM64_ARG(thread->m_pSpForPacSign)); #ifdef _DEBUG BOOL GCOnTransition = FALSE; From b3ffd12b0bc351e8ff495605e3468246c872c8b3 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Wed, 3 Jun 2026 15:35:51 +0100 Subject: [PATCH 56/65] Switch to PAL unwinder for NativeAOT on Windows --- src/coreclr/nativeaot/Runtime/CMakeLists.txt | 8 + .../Runtime/windows/CoffNativeCodeManager.cpp | 256 ++---------------- 2 files changed, 27 insertions(+), 237 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index e491f54a562445..e59b60c932c738 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -132,6 +132,14 @@ if (WIN32) list(APPEND FULL_RUNTIME_SOURCES windows/CoffNativeCodeManager.cpp) + if(CLR_CMAKE_TARGET_ARCH_ARM64) + list(APPEND FULL_RUNTIME_SOURCES + ../../unwinder/arm64/unwinder.cpp + ) + include_directories(../../unwinder) + include_directories(../../unwinder/arm64) + endif() + set(ASM_SUFFIX asm) else() diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index ff5a3b4dd0f1cb..53eee6f20122ec 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -24,6 +24,17 @@ #if defined(TARGET_ARM64) extern "C" void* PacStripPtr(void* ptr); +EXTERN_C PEXCEPTION_ROUTINE NTAPI RtlVirtualUnwindWithSpForPacSign( + IN ULONG HandlerType, + IN ULONG64 ImageBase, + IN ULONG64 ControlPc, + IN PRUNTIME_FUNCTION FunctionEntry, + IN OUT PCONTEXT ContextRecord, + OUT PVOID *HandlerData, + OUT PULONG64 EstablisherFrame, + IN OUT PKNONVOLATILE_CONTEXT_POINTERS ContextPointers OPTIONAL, + OUT PULONG64 SpForPacSign OPTIONAL + ); #endif // TARGET_ARM64 #ifdef TARGET_X86 @@ -843,236 +854,6 @@ bool CoffNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) return true; } -#if defined(TARGET_ARM64) -static bool TryGetSpForPacSigning(PTR_VOID pUnwindDataBlob, - size_t unwindDataBlobSize, - PTR_PTR_VOID ppvRetAddrLocation, - TADDR *pSpForPacSign) -{ - ASSERT(pSpForPacSign != nullptr); - - *pSpForPacSign = 0; - - //TODO-PAC: Bail out in prolog and epilog for consistency with GetPacSignInfo() in JIT - - ASSERT(unwindDataBlobSize >= sizeof(DWORD)); - - PTR_uint8_t unwindDataPtr = dac_cast(pUnwindDataBlob); - PTR_uint8_t unwindDataEndPtr = unwindDataPtr + unwindDataBlobSize; - - // For unwind info layout details refer https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling?view=msvc-170#arm64-exception-handling-information - // Read the header word. - DWORD HeaderWord = *dac_cast((uint8_t*)unwindDataPtr); - unwindDataPtr += sizeof(DWORD); - - ASSERT(((HeaderWord >> 18) & 3) == 0); // Version 0 is the only supported version. - - ULONG UnwindWords = (HeaderWord >> 27) & 31; - ULONG EpilogScopeCount = (HeaderWord >> 22) & 31; - if (EpilogScopeCount == 0 && UnwindWords == 0) - { - if ((unwindDataPtr + sizeof(DWORD)) > unwindDataEndPtr) - { - return false; - } - - DWORD extendedCounts = *dac_cast((uint8_t*)unwindDataPtr); - unwindDataPtr += sizeof(DWORD); - UnwindWords = (extendedCounts >> 16) & 0xFF; - EpilogScopeCount = extendedCounts & 0xFFFF; - } - - if ((HeaderWord & (1 << 21)) != 0) - { - EpilogScopeCount = 0; - } - - if ((unwindDataPtr + (EpilogScopeCount * sizeof(DWORD)) + (UnwindWords * sizeof(DWORD))) > unwindDataEndPtr) - { - return false; - } - - PTR_uint8_t UnwindCodePtr = unwindDataPtr + (EpilogScopeCount * sizeof(DWORD)); - PTR_uint8_t UnwindCodesEndPtr = UnwindCodePtr + (UnwindWords * sizeof(DWORD)); - - auto GetUnwindOpSize = [](BYTE unwindCode) -> SIZE_T - { - if (unwindCode < 0xC0) - { - return 1; - } - else if (unwindCode < 0xE0) - { - return 2; - } - else - { - static const BYTE UnwindCodeSizeTable[32] = - { - 4,1,2,1,1,1,1,3, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 2,3,4,5,1,1,1,1 - }; - - return UnwindCodeSizeTable[unwindCode - 0xE0]; - } - }; - - TADDR* unwindOpStarts = (TADDR*)_alloca((UnwindCodesEndPtr - UnwindCodePtr) * sizeof(TADDR)); - ULONG unwindOpIndex = 0; - for (PTR_uint8_t unwindOpPtr = UnwindCodePtr; unwindOpPtr < UnwindCodesEndPtr;) - { - BYTE curCode = *unwindOpPtr; - if (curCode == 0xE4) // end - { - break; - } - - SIZE_T unwindOpSize = GetUnwindOpSize(curCode); - if ((unwindOpPtr + unwindOpSize) > UnwindCodesEndPtr) - { - return false; - } - - unwindOpStarts[unwindOpIndex++] = dac_cast(unwindOpPtr); - unwindOpPtr += unwindOpSize; - } - - SSIZE_T currentSpOffset = 0; - SSIZE_T lrSlotOffset = -1; - SSIZE_T pacSpOffset = 0; - bool hasPacSignLR = false; - constexpr SSIZE_T PtrSize = 8; - - // ARM64 prolog unwind codes are stored in reverse prolog order. Replay them in prolog order so - // PACIASP/PACIBSP captures the SP that was live when LR was originally signed. - while (unwindOpIndex != 0) - { - UnwindCodePtr = dac_cast(unwindOpStarts[--unwindOpIndex]); - BYTE CurCode = *UnwindCodePtr; - - if (((CurCode & 0xFC) == 0xC8) || // save_regp - ((CurCode & 0xFE) == 0xD8) || // save_fregp - ((CurCode & 0xFE) == 0xDC) || // save_freg - CurCode == 0xE1 || // set_fp - CurCode == 0xE2 || // add_fp - CurCode == 0xE3 || // nop - CurCode == 0xE5 || // end_c - CurCode == 0xE6) // save_next - { - continue; - } - - if ((CurCode & 0xE0) == 0x00) // alloc_s - { - currentSpOffset -= (CurCode & 0x1F) * 16; - continue; - } - - if ((CurCode & 0xE0) == 0x20) // save_r19r20_x - { - currentSpOffset -= (CurCode & 0x1F) * 8; - continue; - } - - if ((CurCode & 0xC0) == 0x40) // save_fplr - { - lrSlotOffset = currentSpOffset + ((CurCode & 0x3F) * 8) + PtrSize; - continue; - } - - if ((CurCode & 0xC0) == 0x80) // save_fplr_x - { - currentSpOffset -= ((CurCode & 0x3F) + 1) * 8; - lrSlotOffset = currentSpOffset + PtrSize; - continue; - } - - if ((CurCode & 0xF8) == 0xC0) // alloc_m - { - ULONG x = ((CurCode & 0x7) << 8) | *(UnwindCodePtr + 1); - currentSpOffset -= x * 16; - continue; - } - - if (((CurCode & 0xFC) == 0xCC) || // save_regp_x - ((CurCode & 0xFE) == 0xDA)) // save_fregp_x - { - ULONG z = *(UnwindCodePtr + 1) & 0x3F; - currentSpOffset -= (z + 1) * 8; - continue; - } - - if ((CurCode & 0xFC) == 0xD0) // save_reg - { - BYTE nextCode = *(UnwindCodePtr + 1); - ULONG x = ((CurCode & 0x3) << 2) | (nextCode >> 6); - ULONG z = nextCode & 0x3F; - if (x == 11) // R30 / LR is the 12th GP register in the save_reg encodings - { - lrSlotOffset = currentSpOffset + z * 8; - } - - continue; - } - - if ((CurCode & 0xFE) == 0xD4) // save_reg_x - { - BYTE nextCode = *(UnwindCodePtr + 1); - ULONG x = ((CurCode & 0x1) << 3) | (nextCode >> 5); - currentSpOffset -= ((nextCode & 0x1F) + 1) * 8; - if (x == 11) // R30 / LR is the 12th GP register in the save_reg encodings - { - lrSlotOffset = currentSpOffset; - } - - continue; - } - - if ((CurCode & 0xFE) == 0xD6) // save_lrpair - { - ULONG z = *(UnwindCodePtr + 1) & 0x3F; - lrSlotOffset = currentSpOffset + z * 8 + PtrSize; - continue; - } - - if (CurCode == 0xDE) // save_freg_x - { - ULONG z = *(UnwindCodePtr + 1) & 0x1F; - currentSpOffset -= (z + 1) * 8; - continue; - } - - if (CurCode == 0xE0) // alloc_l - { - ULONG x = (*(UnwindCodePtr + 1) << 16) | (*(UnwindCodePtr + 2) << 8) | *(UnwindCodePtr + 3); - currentSpOffset -= x * 16; - continue; - } - - if (CurCode == 0xFC) // pac_sign_lr - { - pacSpOffset = currentSpOffset; - hasPacSignLR = true; - continue; - } - - return false; - } - - if (!hasPacSignLR) - { - return true; - } - - if (lrSlotOffset == -1) - { - return false; - } - - *pSpForPacSign = (TADDR)((SSIZE_T)dac_cast(ppvRetAddrLocation) - (lrSlotOffset - pacSpOffset)); - return true; -} -#endif //TARGET_ARM64 - bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in PTR_PTR_VOID * ppvRetAddrLocation, // out @@ -1160,14 +941,20 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn #endif contextPointers.Lr = pRegisterSet->pLR; - RtlVirtualUnwind(NULL, + RtlVirtualUnwindWithSpForPacSign(NULL, dac_cast(m_moduleBase), pRegisterSet->IP, (PRUNTIME_FUNCTION)pNativeMethodInfo->runtimeFunction, &context, &HandlerData, &EstablisherFrame, - &contextPointers); + &contextPointers, + (PULONG64)pSpForArm64PacSign); + + if (context.Pc == 0) + { + return false; + } if (contextPointers.Lr == pRegisterSet->pLR) { @@ -1180,11 +967,6 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn } *ppvRetAddrLocation = (PTR_PTR_VOID)contextPointers.Lr; - if (!TryGetSpForPacSigning(pUnwindDataBlob, unwindDataBlobSize, *ppvRetAddrLocation, pSpForArm64PacSign)) - { - return false; - } - return true; #else *pSpForArm64PacSign = 0; From 9dfec154cfb07d5703f48626a8604fdaa33b2873 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Wed, 3 Jun 2026 15:36:45 +0100 Subject: [PATCH 57/65] Use auth instead of strip while returning from hijack in NativeAOT --- src/coreclr/nativeaot/Runtime/AsmOffsets.h | 5 +++++ src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h | 1 + .../nativeaot/Runtime/arm64/ExceptionHandling.S | 1 + .../nativeaot/Runtime/arm64/ExceptionHandling.asm | 1 + src/coreclr/nativeaot/Runtime/arm64/GcProbe.S | 12 +++++++++--- src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm | 12 ++++++++---- src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S | 14 ++++++++++++-- src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm | 13 +++++++++++-- src/coreclr/nativeaot/Runtime/thread.cpp | 9 +++++++++ src/coreclr/nativeaot/Runtime/thread.h | 3 +++ .../nativeaot/Runtime/unix/unixasmmacrosarm64.inc | 1 + 11 files changed, 61 insertions(+), 11 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsets.h b/src/coreclr/nativeaot/Runtime/AsmOffsets.h index 72f27daccdf033..c46652f53fc843 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsets.h +++ b/src/coreclr/nativeaot/Runtime/AsmOffsets.h @@ -54,7 +54,12 @@ ASM_OFFSET( 30, 48, Thread, m_pTransitionFrame) ASM_OFFSET( 34, 50, Thread, m_pDeferredTransitionFrame) ASM_OFFSET( 40, 68, Thread, m_ppvHijackedReturnAddressLocation) ASM_OFFSET( 44, 70, Thread, m_pvHijackedReturnAddress) +#if defined(TARGET_ARM64) +ASM_OFFSET( 48, 78, Thread, m_pSpForPacSign) +ASM_OFFSET( 4c, 80, Thread, m_pExInfoStackHead) +#else ASM_OFFSET( 48, 78, Thread, m_pExInfoStackHead) +#endif #ifdef TARGET_X86 ASM_OFFSET( 4c, FF, Thread, m_uHijackedReturnValueFlags) #endif diff --git a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h index 4aec05ba2cb5bd..b07404007f84b2 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h +++ b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h @@ -224,6 +224,7 @@ TrashRegister32Bit SETS "w":CC:("$TrashRegister32Bit":RIGHT:((:LEN:TrashRegister str $trashReg1, [$trashReg2] str xzr, [$threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] str xzr, [$threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + str xzr, [$threadReg, #OFFSETOF__Thread__m_pSpForPacSign] 0 MEND diff --git a/src/coreclr/nativeaot/Runtime/arm64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/arm64/ExceptionHandling.S index aa8ac5e79069c3..b67466bc7e61cd 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/ExceptionHandling.S +++ b/src/coreclr/nativeaot/Runtime/arm64/ExceptionHandling.S @@ -331,6 +331,7 @@ LOCAL_LABEL(ClearThreadState): // clear the Thread's hijack state str xzr, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] str xzr, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + str xzr, [x2, #OFFSETOF__Thread__m_pSpForPacSign] LOCAL_LABEL(NotHijacked): diff --git a/src/coreclr/nativeaot/Runtime/arm64/ExceptionHandling.asm b/src/coreclr/nativeaot/Runtime/arm64/ExceptionHandling.asm index 6f87c632a51e1a..da61aa1cfd6e71 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/ExceptionHandling.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/ExceptionHandling.asm @@ -333,6 +333,7 @@ ClearThreadState ;; clear the Thread's hijack state str xzr, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] str xzr, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + str xzr, [x2, #OFFSETOF__Thread__m_pSpForPacSign] NotHijacked diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S index 144e2688adf61a..f69505c05e4c11 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S @@ -4,6 +4,8 @@ #include #include "AsmOffsets.inc" +.arch_extension pauth + #define PROBE_FRAME_SIZE 0x140 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, lr, m_pThread, m_Flags) + // 10 * 8 for callee saved registers + // 1 * 8 for caller SP + @@ -150,16 +152,20 @@ #endif // - // Fix the stack by restoring the original return address + // Fix the stack by restoring and authenticating the original return address. // ldr lr, [x9, #OFFSETOF__Thread__m_pvHijackedReturnAddress] - xpaclri + ldr x16, [x9, #OFFSETOF__Thread__m_pSpForPacSign] + cbz x16, 0f + autia lr, x16 +0: // // Clear hijack state // - // Clear m_ppvHijackedReturnAddressLocation and m_pvHijackedReturnAddress + // Clear m_ppvHijackedReturnAddressLocation, m_pvHijackedReturnAddress, and m_pSpForPacSign stp xzr, xzr, [x9, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str xzr, [x9, #OFFSETOF__Thread__m_pSpForPacSign] .endm // diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm index 12c5d76b2f0d94..86ba0a2a8129db 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm @@ -112,7 +112,7 @@ PROBE_FRAME_SIZE field 0 ;; ;; Register state on exit: ;; x9: thread pointer -;; x0-x7 preserved, x10 trashed +;; x0-x7 preserved, x10 and x16 trashed ;; MACRO FixupHijackedCallstack @@ -121,17 +121,21 @@ PROBE_FRAME_SIZE field 0 INLINE_GETTHREAD x9, x10 ;; - ;; Fix the stack by restoring the original return address + ;; Fix the stack by restoring and authenticating the original return address. ;; ldr lr, [x9, #OFFSETOF__Thread__m_pvHijackedReturnAddress] - DCD 0xD50320FF ;; xpaclri instruction in binary to avoid error while compiling with non-PAC enabled compilers + ldr x16, [x9, #OFFSETOF__Thread__m_pSpForPacSign] + cbz x16, ClearHijackState + DCD 0xDAC1161E ;; autib lr, x16 instruction in binary to avoid requiring PAC-enabled assemblers +ClearHijackState ;; ;; Clear hijack state ;; ASSERT OFFSETOF__Thread__m_pvHijackedReturnAddress == (OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8) - ;; Clear m_ppvHijackedReturnAddressLocation and m_pvHijackedReturnAddress + ;; Clear m_ppvHijackedReturnAddressLocation, m_pvHijackedReturnAddress, and m_pSpForPacSign stp xzr, xzr, [x9, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str xzr, [x9, #OFFSETOF__Thread__m_pSpForPacSign] MEND MACRO diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S index b68c5589e3431f..21c44d8fb38d15 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S @@ -14,8 +14,7 @@ LEAF_END PacStripPtr, _TEXT // void* PacSignPtr(void *, void *); -// This function signs the input pointer using x1 as salt. -// Thus we need to move input in lr, sign it and then copy it back to the result register. +// This function signs the input pointer using x1 as salt. It is a no-op on non-PAC enabled machines. .arch_extension pauth LEAF_ENTRY PacSignPtr, _TEXT mov x17, x0 @@ -24,3 +23,14 @@ mov x0, x17 ret LEAF_END PacSignPtr, _TEXT + +// void* PacAuthPtr(void *, void *); +// This function authenticates the input signed-pointer using x1 as salt. It is a no-op on non-PAC enabled machines. +.arch_extension pauth + LEAF_ENTRY PacAuthPtr, _TEXT + mov x17, x0 + mov x16, x1 + autia1716 + mov x0, x17 + ret + LEAF_END PacAuthPtr, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm index 1695a494fc8103..d31511558da154 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm @@ -14,8 +14,7 @@ LEAF_END PacStripPtr ; void* PacSignPtr(void *, void *); -; This function signs the input pointer using x1 as salt. -; Thus we need to move input in lr, sign it and then copy it back to the result register. +; This function signs the input pointer using x1 as salt. It is a no-op on non-PAC enabled machines. LEAF_ENTRY PacSignPtr mov x17, x0 mov x16, x1 @@ -24,4 +23,14 @@ ret LEAF_END PacSignPtr +; void* PacAuthPtr(void *, void *); +; This function authenticates the input signed-pointer using x1 as salt. It is a no-op on non-PAC enabled machines. + LEAF_ENTRY PacAuthPtr + mov x17, x0 + mov x16, x1 + DCD 0xD50321DF ; autib1716 instruction in binary to avoid error while compiling with non-PAC enabled compilers + mov x0, x17 + ret + LEAF_END PacAuthPtr + end diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index 430983f4075e4a..158c2698eb14c9 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -836,6 +836,9 @@ void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, Hijack m_ppvHijackedReturnAddressLocation = ppvRetAddrLocation; m_pvHijackedReturnAddress = pvRetAddr; +#if defined(TARGET_ARM64) + m_pSpForPacSign = (void*)spForPacSign; +#endif // TARGET_ARM64 #if defined(TARGET_X86) bool isAsync = false; GCRefKind retKind = frameIterator->GetCodeManager()->GetReturnValueKind(frameIterator->GetMethodInfo(), frameIterator->GetRegisterSet(), &isAsync); @@ -972,6 +975,9 @@ void Thread::UnhijackWorker() if (m_pvHijackedReturnAddress == NULL) { ASSERT(m_ppvHijackedReturnAddressLocation == NULL); +#if defined(TARGET_ARM64) + ASSERT(m_pSpForPacSign == NULL); +#endif // TARGET_ARM64 return; } @@ -982,6 +988,9 @@ void Thread::UnhijackWorker() // Clear the hijack state. m_ppvHijackedReturnAddressLocation = NULL; m_pvHijackedReturnAddress = NULL; +#if defined(TARGET_ARM64) + m_pSpForPacSign = NULL; +#endif // TARGET_ARM64 #ifdef TARGET_X86 m_uHijackedReturnValueFlags = 0; #endif diff --git a/src/coreclr/nativeaot/Runtime/thread.h b/src/coreclr/nativeaot/Runtime/thread.h index a910894ec968df..e99b617a1ddb75 100644 --- a/src/coreclr/nativeaot/Runtime/thread.h +++ b/src/coreclr/nativeaot/Runtime/thread.h @@ -141,6 +141,9 @@ struct RuntimeThreadLocals #ifdef FEATURE_HIJACK void ** m_ppvHijackedReturnAddressLocation; void * m_pvHijackedReturnAddress; +#if defined(TARGET_ARM64) + void * m_pSpForPacSign; +#endif #endif // FEATURE_HIJACK PTR_ExInfo m_pExInfoStackHead; #ifdef TARGET_X86 diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc index 3027a17d6c8fc1..635f71b08b6e61 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc @@ -323,6 +323,7 @@ C_FUNC(\Name): str \trashReg1, [\trashReg2] str xzr, [\threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] str xzr, [\threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + str xzr, [\threadReg, #OFFSETOF__Thread__m_pSpForPacSign] 0: .endm From 0f7379b7c139c285ac1fe58420f230b0de226fdb Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Wed, 3 Jun 2026 16:17:07 +0100 Subject: [PATCH 58/65] Fix build errors on Windows --- src/coreclr/unwinder/arm64/unwinder.cpp | 8 ++++++++ .../Contracts/StackWalk/Context/ARM64/ARM64Unwinder.cs | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/src/coreclr/unwinder/arm64/unwinder.cpp b/src/coreclr/unwinder/arm64/unwinder.cpp index 15d9e46bcbeea5..3f8555b7f8f9a4 100644 --- a/src/coreclr/unwinder/arm64/unwinder.cpp +++ b/src/coreclr/unwinder/arm64/unwinder.cpp @@ -3,11 +3,17 @@ // +#if defined(NATIVEAOT) +#include "common.h" +#include +#include "daccess.h" +#else #include "stdafx.h" #include "utilcode.h" #include "crosscomp.h" #include "unwinder.h" +#endif #define NOTHING @@ -2786,6 +2792,7 @@ Return Value: #endif // !defined(DEBUGGER_UNWIND) +#if !defined(NATIVEAOT) BOOL OOPStackUnwinderArm64::Unwind(T_CONTEXT * pContext) { DWORD64 ImageBase = 0; @@ -2853,6 +2860,7 @@ BOOL DacUnwindStackFrame(T_CONTEXT *pContext, T_KNONVOLATILE_CONTEXT_POINTERS* p return res; } +#endif // !defined(NATIVEAOT) #if defined(HOST_UNIX) diff --git a/src/native/managed/cdac/Microsoft.Diagnostics.DataContractReader.Contracts/Contracts/StackWalk/Context/ARM64/ARM64Unwinder.cs b/src/native/managed/cdac/Microsoft.Diagnostics.DataContractReader.Contracts/Contracts/StackWalk/Context/ARM64/ARM64Unwinder.cs index a2b59cd4a2c0af..4f93aa50f20439 100644 --- a/src/native/managed/cdac/Microsoft.Diagnostics.DataContractReader.Contracts/Contracts/StackWalk/Context/ARM64/ARM64Unwinder.cs +++ b/src/native/managed/cdac/Microsoft.Diagnostics.DataContractReader.Contracts/Contracts/StackWalk/Context/ARM64/ARM64Unwinder.cs @@ -764,6 +764,10 @@ private bool VirtualUnwindFull( } context.Lr &= 0x0000FFFFFFFFFFFF; + + // + // TODO: Implement support for UnwindFlags RTL_VIRTUAL_UNWIND2_VALIDATE_PAC. + // } // From 528710b49b8bd7accc9bb20d9e6dd51fcffa41d3 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 4 Jun 2026 16:03:52 +0100 Subject: [PATCH 59/65] Fix build errors on Windows --- src/coreclr/inc/daccess.h | 10 +++++----- src/coreclr/nativeaot/Runtime/CMakeLists.txt | 1 + src/coreclr/nativeaot/Runtime/arm64/GcProbe.S | 4 ++-- src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm | 10 +++++----- src/coreclr/unwinder/arm64/unwinder.cpp | 7 +++---- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/coreclr/inc/daccess.h b/src/coreclr/inc/daccess.h index b3f97772c24ef3..e78f36776e5fef 100644 --- a/src/coreclr/inc/daccess.h +++ b/src/coreclr/inc/daccess.h @@ -2369,9 +2369,13 @@ typedef DPTR(uint32_t) PTR_uint32_t; typedef DPTR(uint64_t) PTR_uint64_t; typedef DPTR(uintptr_t) PTR_uintptr_t; typedef DPTR(TADDR) PTR_TADDR; +typedef ArrayDPTR(BYTE) PTR_BYTE; +typedef DPTR(WORD) PTR_WORD; +typedef DPTR(DWORD) PTR_DWORD; +typedef DPTR(ULONG64) PTR_ULONG64; +typedef DPTR(UINT64) PTR_UINT64; #ifndef NATIVEAOT -typedef ArrayDPTR(BYTE) PTR_BYTE; typedef DPTR(PTR_BYTE) PTR_PTR_BYTE; typedef DPTR(PTR_PTR_BYTE) PTR_PTR_PTR_BYTE; typedef ArrayDPTR(signed char) PTR_SBYTE; @@ -2379,16 +2383,12 @@ typedef ArrayDPTR(const BYTE) PTR_CBYTE; typedef DPTR(INT8) PTR_INT8; typedef DPTR(INT16) PTR_INT16; typedef DPTR(UINT16) PTR_UINT16; -typedef DPTR(WORD) PTR_WORD; typedef DPTR(USHORT) PTR_USHORT; -typedef DPTR(DWORD) PTR_DWORD; typedef DPTR(LONG) PTR_LONG; typedef DPTR(ULONG) PTR_ULONG; typedef DPTR(INT32) PTR_INT32; typedef DPTR(UINT32) PTR_UINT32; -typedef DPTR(ULONG64) PTR_ULONG64; typedef DPTR(INT64) PTR_INT64; -typedef DPTR(UINT64) PTR_UINT64; typedef DPTR(SIZE_T) PTR_SIZE_T; typedef DPTR(int) PTR_int; typedef DPTR(BOOL) PTR_BOOL; diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index e59b60c932c738..a50f7c40982765 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -136,6 +136,7 @@ if (WIN32) list(APPEND FULL_RUNTIME_SOURCES ../../unwinder/arm64/unwinder.cpp ) + include_directories(../../inc) include_directories(../../unwinder) include_directories(../../unwinder/arm64) endif() diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S index f69505c05e4c11..544153f75f4628 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S @@ -156,10 +156,10 @@ // ldr lr, [x9, #OFFSETOF__Thread__m_pvHijackedReturnAddress] ldr x16, [x9, #OFFSETOF__Thread__m_pSpForPacSign] - cbz x16, 0f + cbz x16, clearHijackState autia lr, x16 -0: +clearHijackState: // // Clear hijack state // diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm index 86ba0a2a8129db..1683ad70cb5f14 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm @@ -115,7 +115,7 @@ PROBE_FRAME_SIZE field 0 ;; x0-x7 preserved, x10 and x16 trashed ;; MACRO - FixupHijackedCallstack + FixupHijackedCallstack $ClearHijackStateLabel ;; x9 <- GetThread(), TRASHES x10 INLINE_GETTHREAD x9, x10 @@ -125,10 +125,10 @@ PROBE_FRAME_SIZE field 0 ;; ldr lr, [x9, #OFFSETOF__Thread__m_pvHijackedReturnAddress] ldr x16, [x9, #OFFSETOF__Thread__m_pSpForPacSign] - cbz x16, ClearHijackState + cbz x16, $ClearHijackStateLabel DCD 0xDAC1161E ;; autib lr, x16 instruction in binary to avoid requiring PAC-enabled assemblers -ClearHijackState +$ClearHijackStateLabel ;; ;; Clear hijack state ;; @@ -159,7 +159,7 @@ ClearHijackState HijackTargetFakeProlog LABELED_RETURN_ADDRESS RhpGcProbeHijack - FixupHijackedCallstack + FixupHijackedCallstack RhpGcProbeClearHijackState ldr x10, =RhpTrapThreads ldr w10, [x10] @@ -206,7 +206,7 @@ WaitForGC ;; ;; LEAF_ENTRY RhpGcStressHijack - FixupHijackedCallstack + FixupHijackedCallstack RhpGcStressClearHijackState mov x12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_X0 + PTFF_SAVE_X1 + PTFF_SAVE_X2 + PTFF_SAVE_X3 + PTFF_SAVE_X4 + PTFF_SAVE_X5 + PTFF_SAVE_X6 + PTFF_SAVE_X7) b RhpGcStressProbe LEAF_END RhpGcStressHijack diff --git a/src/coreclr/unwinder/arm64/unwinder.cpp b/src/coreclr/unwinder/arm64/unwinder.cpp index 3f8555b7f8f9a4..53948fd72d1c8e 100644 --- a/src/coreclr/unwinder/arm64/unwinder.cpp +++ b/src/coreclr/unwinder/arm64/unwinder.cpp @@ -5,15 +5,14 @@ #if defined(NATIVEAOT) #include "common.h" -#include -#include "daccess.h" #else #include "stdafx.h" #include "utilcode.h" -#include "crosscomp.h" +#endif +#include "crosscomp.h" +#include "clrnt.h" #include "unwinder.h" -#endif #define NOTHING From 782e0d87887a96ba487b7e3d20edc779bb93530e Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 4 Jun 2026 16:32:10 +0100 Subject: [PATCH 60/65] Try fixing arm32 and x64 build failures --- src/coreclr/inc/daccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/inc/daccess.h b/src/coreclr/inc/daccess.h index e78f36776e5fef..6b5a584ffbdecc 100644 --- a/src/coreclr/inc/daccess.h +++ b/src/coreclr/inc/daccess.h @@ -2372,8 +2372,8 @@ typedef DPTR(TADDR) PTR_TADDR; typedef ArrayDPTR(BYTE) PTR_BYTE; typedef DPTR(WORD) PTR_WORD; typedef DPTR(DWORD) PTR_DWORD; -typedef DPTR(ULONG64) PTR_ULONG64; -typedef DPTR(UINT64) PTR_UINT64; +typedef DPTR(uint64_t) PTR_ULONG64; +typedef DPTR(uint64_t) PTR_UINT64; #ifndef NATIVEAOT typedef DPTR(PTR_BYTE) PTR_PTR_BYTE; From 3efd4141501f0cf4940a8aacd51e63dea41328cf Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 4 Jun 2026 17:18:56 +0100 Subject: [PATCH 61/65] Try fixing build errors --- src/coreclr/inc/daccess.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/inc/daccess.h b/src/coreclr/inc/daccess.h index 6b5a584ffbdecc..e36289dd18e178 100644 --- a/src/coreclr/inc/daccess.h +++ b/src/coreclr/inc/daccess.h @@ -2369,9 +2369,9 @@ typedef DPTR(uint32_t) PTR_uint32_t; typedef DPTR(uint64_t) PTR_uint64_t; typedef DPTR(uintptr_t) PTR_uintptr_t; typedef DPTR(TADDR) PTR_TADDR; -typedef ArrayDPTR(BYTE) PTR_BYTE; -typedef DPTR(WORD) PTR_WORD; -typedef DPTR(DWORD) PTR_DWORD; +typedef ArrayDPTR(uint8_t) PTR_BYTE; +typedef DPTR(uint16_t) PTR_WORD; +typedef DPTR(uint32_t) PTR_DWORD; typedef DPTR(uint64_t) PTR_ULONG64; typedef DPTR(uint64_t) PTR_UINT64; From 98cbb045d7327d5408e9a6f2304b3bc0f838daa6 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 4 Jun 2026 18:16:13 +0100 Subject: [PATCH 62/65] More build fixes --- src/coreclr/inc/daccess.h | 10 ++++++++-- src/coreclr/nativeaot/Runtime/arm64/GcProbe.S | 8 ++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/coreclr/inc/daccess.h b/src/coreclr/inc/daccess.h index e36289dd18e178..b0239d6a9092bf 100644 --- a/src/coreclr/inc/daccess.h +++ b/src/coreclr/inc/daccess.h @@ -2369,13 +2369,19 @@ typedef DPTR(uint32_t) PTR_uint32_t; typedef DPTR(uint64_t) PTR_uint64_t; typedef DPTR(uintptr_t) PTR_uintptr_t; typedef DPTR(TADDR) PTR_TADDR; + +#ifdef NATIVEAOT typedef ArrayDPTR(uint8_t) PTR_BYTE; typedef DPTR(uint16_t) PTR_WORD; typedef DPTR(uint32_t) PTR_DWORD; typedef DPTR(uint64_t) PTR_ULONG64; typedef DPTR(uint64_t) PTR_UINT64; - -#ifndef NATIVEAOT +#else +typedef ArrayDPTR(BYTE) PTR_BYTE; +typedef DPTR(WORD) PTR_WORD; +typedef DPTR(DWORD) PTR_DWORD; +typedef DPTR(ULONG64) PTR_ULONG64; +typedef DPTR(UINT64) PTR_UINT64; typedef DPTR(PTR_BYTE) PTR_PTR_BYTE; typedef DPTR(PTR_PTR_BYTE) PTR_PTR_PTR_BYTE; typedef ArrayDPTR(signed char) PTR_SBYTE; diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S index 544153f75f4628..66dcdfe6352956 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S @@ -105,7 +105,7 @@ // x9: thread pointer // x0-x7, q0-q7 preserved, x10 trashed // -.macro FixupHijackedCallstack +.macro FixupHijackedCallstack clearHijackState // x9 <- GetThread() @@ -156,10 +156,10 @@ // ldr lr, [x9, #OFFSETOF__Thread__m_pvHijackedReturnAddress] ldr x16, [x9, #OFFSETOF__Thread__m_pSpForPacSign] - cbz x16, clearHijackState + cbz x16, \clearHijackState autia lr, x16 -clearHijackState: +\clearHijackState: // // Clear hijack state // @@ -172,7 +172,7 @@ clearHijackState: // GC Probe Hijack target // NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler - FixupHijackedCallstack + FixupHijackedCallstack LOCAL_LABEL(RhpGcProbeClearHijackState) PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, 10 tbnz x10, #TrapThreadsFlags_TrapThreads_Bit, LOCAL_LABEL(WaitForGC) From e41c3cafcefe32885fb7dbeb917c62a9039b7303 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 4 Jun 2026 18:50:59 +0100 Subject: [PATCH 63/65] Limit changes to NativeAOT declartions to Arm64 only --- src/coreclr/inc/daccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/inc/daccess.h b/src/coreclr/inc/daccess.h index b0239d6a9092bf..820d9f9217dfb8 100644 --- a/src/coreclr/inc/daccess.h +++ b/src/coreclr/inc/daccess.h @@ -2370,13 +2370,13 @@ typedef DPTR(uint64_t) PTR_uint64_t; typedef DPTR(uintptr_t) PTR_uintptr_t; typedef DPTR(TADDR) PTR_TADDR; -#ifdef NATIVEAOT +#if defined(NATIVEAOT) && defined(TARGET_ARM64) typedef ArrayDPTR(uint8_t) PTR_BYTE; typedef DPTR(uint16_t) PTR_WORD; typedef DPTR(uint32_t) PTR_DWORD; typedef DPTR(uint64_t) PTR_ULONG64; typedef DPTR(uint64_t) PTR_UINT64; -#else +#elif !defined(NATIVEAOT) typedef ArrayDPTR(BYTE) PTR_BYTE; typedef DPTR(WORD) PTR_WORD; typedef DPTR(DWORD) PTR_DWORD; From 0910b0adc7cd7b55d903770a6d8cb699c0619834 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 5 Jun 2026 11:34:25 +0100 Subject: [PATCH 64/65] Avoid separate definitions for NativeAOT in daccess.h --- src/coreclr/inc/clrnt.h | 6 +++--- src/coreclr/inc/daccess.h | 8 +------- src/coreclr/unwinder/arm64/unwinder.cpp | 8 ++++---- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/src/coreclr/inc/clrnt.h b/src/coreclr/inc/clrnt.h index 0e3de58d2c8e04..f0c72b682b465e 100644 --- a/src/coreclr/inc/clrnt.h +++ b/src/coreclr/inc/clrnt.h @@ -333,7 +333,7 @@ RtlpGetFunctionEndAddress ( else { // Get from the xdata record. - FunctionLength = *(PTR_ULONG64)(ImageBase + FunctionLength) & 0x3ffff; + FunctionLength = *(PTR_uint64_t)(ImageBase + FunctionLength) & 0x3ffff; } return FunctionEntry->BeginAddress + 4 * FunctionLength; @@ -422,7 +422,7 @@ RtlpGetFunctionEndAddress ( if ((FunctionLength & 3) != 0) { FunctionLength = (FunctionLength >> 2) & 0x7ff; } else { - FunctionLength = *(PTR_ULONG64)(ImageBase + FunctionLength) & 0x3ffff; + FunctionLength = *(PTR_uint64_t)(ImageBase + FunctionLength) & 0x3ffff; } return FunctionEntry->BeginAddress + 4 * FunctionLength; @@ -478,7 +478,7 @@ RtlpGetFunctionEndAddress ( if ((FunctionLength & 3) != 0) { FunctionLength = (FunctionLength >> 2) & 0x7ff; } else { - FunctionLength = *(PTR_ULONG64)(ImageBase + FunctionLength) & 0x3ffff; + FunctionLength = *(PTR_uint64_t)(ImageBase + FunctionLength) & 0x3ffff; } return FunctionEntry->BeginAddress + 2 * FunctionLength; diff --git a/src/coreclr/inc/daccess.h b/src/coreclr/inc/daccess.h index 820d9f9217dfb8..883843de0ba97c 100644 --- a/src/coreclr/inc/daccess.h +++ b/src/coreclr/inc/daccess.h @@ -2370,13 +2370,7 @@ typedef DPTR(uint64_t) PTR_uint64_t; typedef DPTR(uintptr_t) PTR_uintptr_t; typedef DPTR(TADDR) PTR_TADDR; -#if defined(NATIVEAOT) && defined(TARGET_ARM64) -typedef ArrayDPTR(uint8_t) PTR_BYTE; -typedef DPTR(uint16_t) PTR_WORD; -typedef DPTR(uint32_t) PTR_DWORD; -typedef DPTR(uint64_t) PTR_ULONG64; -typedef DPTR(uint64_t) PTR_UINT64; -#elif !defined(NATIVEAOT) +#ifndef NATIVEAOT typedef ArrayDPTR(BYTE) PTR_BYTE; typedef DPTR(WORD) PTR_WORD; typedef DPTR(DWORD) PTR_DWORD; diff --git a/src/coreclr/unwinder/arm64/unwinder.cpp b/src/coreclr/unwinder/arm64/unwinder.cpp index 53948fd72d1c8e..ff929232d9d401 100644 --- a/src/coreclr/unwinder/arm64/unwinder.cpp +++ b/src/coreclr/unwinder/arm64/unwinder.cpp @@ -175,10 +175,10 @@ typedef struct _ARM64_VFP_STATE #if !defined(DEBUGGER_UNWIND) -#define MEMORY_READ_BYTE(params, addr) (*dac_cast(addr)) -#define MEMORY_READ_WORD(params, addr) (*dac_cast(addr)) -#define MEMORY_READ_DWORD(params, addr) (*dac_cast(addr)) -#define MEMORY_READ_QWORD(params, addr) (*dac_cast(addr)) +#define MEMORY_READ_BYTE(params, addr) (*dac_cast(addr)) +#define MEMORY_READ_WORD(params, addr) (*dac_cast(addr)) +#define MEMORY_READ_DWORD(params, addr) (*dac_cast(addr)) +#define MEMORY_READ_QWORD(params, addr) (*dac_cast(addr)) #endif From 0e41b35950481913b3d47313f3c6e4eefce97409 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 5 Jun 2026 11:47:58 +0100 Subject: [PATCH 65/65] Avoid adding crosscomp.h for NativeAOT builds to avoid redef warnings for macros like CONTEXT_UNWOUND_TO_CALL --- src/coreclr/unwinder/arm64/unwinder.cpp | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/coreclr/unwinder/arm64/unwinder.cpp b/src/coreclr/unwinder/arm64/unwinder.cpp index ff929232d9d401..b49f2c3039c7a4 100644 --- a/src/coreclr/unwinder/arm64/unwinder.cpp +++ b/src/coreclr/unwinder/arm64/unwinder.cpp @@ -5,12 +5,33 @@ #if defined(NATIVEAOT) #include "common.h" +#include +#include "rhassert.h" + +#ifndef T_CONTEXT +#define T_CONTEXT CONTEXT +#endif +#ifndef PT_CONTEXT +#define PT_CONTEXT PCONTEXT +#endif +#ifndef T_KNONVOLATILE_CONTEXT_POINTERS +#define T_KNONVOLATILE_CONTEXT_POINTERS KNONVOLATILE_CONTEXT_POINTERS +#endif +#ifndef PT_KNONVOLATILE_CONTEXT_POINTERS +#define PT_KNONVOLATILE_CONTEXT_POINTERS PKNONVOLATILE_CONTEXT_POINTERS +#endif +#ifndef T_RUNTIME_FUNCTION +#define T_RUNTIME_FUNCTION RUNTIME_FUNCTION +#endif +#ifndef PT_RUNTIME_FUNCTION +#define PT_RUNTIME_FUNCTION PRUNTIME_FUNCTION +#endif #else #include "stdafx.h" #include "utilcode.h" +#include "crosscomp.h" #endif -#include "crosscomp.h" #include "clrnt.h" #include "unwinder.h"