diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def index 5a73a52eb37a65..202c27a4ce4d03 100644 --- a/src/coreclr/inc/CrstTypes.def +++ b/src/coreclr/inc/CrstTypes.def @@ -517,6 +517,10 @@ End Crst PerfMap AcquiredAfter CodeVersioning AssemblyList + AcquiredBefore PerfMapDeferredActions +End + +Crst PerfMapDeferredActions End Crst InterfaceDispatchGlobalLists diff --git a/src/coreclr/inc/crsttypes_generated.h b/src/coreclr/inc/crsttypes_generated.h index d6720d9ee42ac3..380a2942fe3a92 100644 --- a/src/coreclr/inc/crsttypes_generated.h +++ b/src/coreclr/inc/crsttypes_generated.h @@ -85,41 +85,42 @@ enum CrstType CrstPEImage = 67, CrstPendingTypeLoadEntry = 68, CrstPerfMap = 69, - CrstPgoData = 70, - CrstPinnedByrefValidation = 71, - CrstPinnedHeapHandleTable = 72, - CrstPregeneratedStringThunks = 73, - CrstProfilerGCRefDataFreeList = 74, - CrstProfilingAPIStatus = 75, - CrstRCWCache = 76, - CrstRCWCleanupList = 77, - CrstReadyToRunEntryPointToMethodDescMap = 78, - CrstReflection = 79, - CrstReJITGlobalRequest = 80, - CrstSigConvert = 81, - CrstSingleUseLock = 82, - CrstStressLog = 83, - CrstStubCache = 84, - CrstStubDispatchCache = 85, - CrstSyncBlockCache = 86, - CrstSyncHashLock = 87, - CrstSystemDomain = 88, - CrstSystemDomainDelayedUnloadList = 89, - CrstThreadIdDispenser = 90, - CrstThreadLocalStorageLock = 91, - CrstThreadStore = 92, - CrstTieredCompilation = 93, - CrstTypeEquivalenceMap = 94, - CrstTypeIDMap = 95, - CrstUMEntryThunkCache = 96, - CrstUMEntryThunkFreeListLock = 97, - CrstUniqueStack = 98, - CrstUnresolvedClassLock = 99, - CrstUnwindInfoTablePendingLock = 100, - CrstUnwindInfoTablePublishLock = 101, - CrstVSDIndirectionCellLock = 102, - CrstWrapperTemplate = 103, - kNumberOfCrstTypes = 104 + CrstPerfMapDeferredActions = 70, + CrstPgoData = 71, + CrstPinnedByrefValidation = 72, + CrstPinnedHeapHandleTable = 73, + CrstPregeneratedStringThunks = 74, + CrstProfilerGCRefDataFreeList = 75, + CrstProfilingAPIStatus = 76, + CrstRCWCache = 77, + CrstRCWCleanupList = 78, + CrstReadyToRunEntryPointToMethodDescMap = 79, + CrstReflection = 80, + CrstReJITGlobalRequest = 81, + CrstSigConvert = 82, + CrstSingleUseLock = 83, + CrstStressLog = 84, + CrstStubCache = 85, + CrstStubDispatchCache = 86, + CrstSyncBlockCache = 87, + CrstSyncHashLock = 88, + CrstSystemDomain = 89, + CrstSystemDomainDelayedUnloadList = 90, + CrstThreadIdDispenser = 91, + CrstThreadLocalStorageLock = 92, + CrstThreadStore = 93, + CrstTieredCompilation = 94, + CrstTypeEquivalenceMap = 95, + CrstTypeIDMap = 96, + CrstUMEntryThunkCache = 97, + CrstUMEntryThunkFreeListLock = 98, + CrstUniqueStack = 99, + CrstUnresolvedClassLock = 100, + CrstUnwindInfoTablePendingLock = 101, + CrstUnwindInfoTablePublishLock = 102, + CrstVSDIndirectionCellLock = 103, + CrstWrapperTemplate = 104, + kNumberOfCrstTypes = 105 }; #endif // __CRST_TYPES_INCLUDED @@ -131,7 +132,7 @@ enum CrstType int g_rgCrstLevelMap[] = { 9, // CrstAppDomainCache - 2, // CrstAssemblyList + 3, // CrstAssemblyList 13, // CrstAssemblyLoader 2, // CrstAsyncContinuations 3, // CrstAvailableClass @@ -199,7 +200,8 @@ int g_rgCrstLevelMap[] = 0, // CrstNotifyGdb 4, // CrstPEImage 20, // CrstPendingTypeLoadEntry - 0, // CrstPerfMap + 2, // CrstPerfMap + 0, // CrstPerfMapDeferredActions 3, // CrstPgoData 0, // CrstPinnedByrefValidation 15, // CrstPinnedHeapHandleTable @@ -309,6 +311,7 @@ LPCSTR g_rgCrstNameMap[] = "CrstPEImage", "CrstPendingTypeLoadEntry", "CrstPerfMap", + "CrstPerfMapDeferredActions", "CrstPgoData", "CrstPinnedByrefValidation", "CrstPinnedHeapHandleTable", diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index e3a779a3d3704b..aed7e5fa48bcd8 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -371,6 +371,18 @@ PALAPI // Log a method to the jitdump file. PAL_PerfJitDump_LogMethod(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo, bool reportCodeBlock); +PALIMPORT +int +PALAPI +// Log a method to the jitdump file with a pre-captured timestamp and code buffer. +PAL_PerfJitDump_LogMethodWithTimestamp(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo, uint64_t timestamp, void* codeBuffer, size_t codeBufferSize); + +PALIMPORT +uint64_t +PALAPI +// Get the current timestamp in the same format used by jitdump records. +PAL_PerfJitDump_GetTimeStamp(); + PALIMPORT int PALAPI diff --git a/src/coreclr/pal/src/misc/perfjitdump.cpp b/src/coreclr/pal/src/misc/perfjitdump.cpp index 3b45b8c8856d62..d89ab4de05fc74 100644 --- a/src/coreclr/pal/src/misc/perfjitdump.cpp +++ b/src/coreclr/pal/src/misc/perfjitdump.cpp @@ -330,6 +330,86 @@ struct PerfJitDumpState return 0; } + int LogMethodWithTimestamp(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo, uint64_t timestamp, void* codeBuffer, size_t codeBufferSize) + { + int result = 0; + + if (enabled) + { + size_t symbolLen = strlen(symbol); + + JitCodeLoadRecord record; + + size_t bytesRemaining = sizeof(JitCodeLoadRecord) + symbolLen + 1 + codeBufferSize; + + record.header.timestamp = timestamp; + record.vma = (uint64_t) pCode; + record.code_addr = (uint64_t) pCode; + record.code_size = codeSize; + record.header.total_size = bytesRemaining; + + iovec items[] = { + { &record, sizeof(JitCodeLoadRecord) }, + { (void *)symbol, symbolLen + 1 }, + { codeBuffer, codeBufferSize }, + }; + size_t itemsCount = sizeof(items) / sizeof(items[0]); + + size_t itemsWritten = 0; + + if (!enabled) + goto exit2; + + // Increment codeIndex while locked + record.code_index = ++codeIndex; + + do + { + result = writev(fd, items + itemsWritten, itemsCount - itemsWritten); + + if ((size_t)result == bytesRemaining) + break; + + if (result == -1) + { + if (errno == EINTR) + continue; + + return FatalError(); + } + + // Detect unexpected failure cases. + _ASSERTE(bytesRemaining > (size_t)result); + _ASSERTE(result > 0); + + // Handle partial write case + + bytesRemaining -= result; + + do + { + if ((size_t)result < items[itemsWritten].iov_len) + { + items[itemsWritten].iov_len -= result; + items[itemsWritten].iov_base = (void*)((size_t) items[itemsWritten].iov_base + result); + break; + } + else + { + result -= items[itemsWritten].iov_len; + itemsWritten++; + + // Detect unexpected failure case. + _ASSERTE(itemsWritten < itemsCount); + } + } while (result > 0); + } while (true); + + } +exit2: + return 0; + } + int Finish() { int result = 0; @@ -394,6 +474,20 @@ PAL_PerfJitDump_LogMethod(void* pCode, size_t codeSize, const char* symbol, void return GetState().LogMethod(pCode, codeSize, symbol, debugInfo, unwindInfo, reportCodeBlock); } +int +PALAPI +PAL_PerfJitDump_LogMethodWithTimestamp(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo, uint64_t timestamp, void* codeBuffer, size_t codeBufferSize) +{ + return GetState().LogMethodWithTimestamp(pCode, codeSize, symbol, debugInfo, unwindInfo, timestamp, codeBuffer, codeBufferSize); +} + +uint64_t +PALAPI +PAL_PerfJitDump_GetTimeStamp() +{ + return GetTimeStampNS(); +} + int PALAPI PAL_PerfJitDump_Finish() @@ -424,6 +518,20 @@ PAL_PerfJitDump_LogMethod(void* pCode, size_t codeSize, const char* symbol, void return 0; } +int +PALAPI +PAL_PerfJitDump_LogMethodWithTimestamp(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo, uint64_t timestamp, void* codeBuffer, size_t codeBufferSize) +{ + return 0; +} + +uint64_t +PALAPI +PAL_PerfJitDump_GetTimeStamp() +{ + return 0; +} + int PALAPI PAL_PerfJitDump_Finish() diff --git a/src/coreclr/vm/eventing/eventpipe/ds-rt-coreclr.h b/src/coreclr/vm/eventing/eventpipe/ds-rt-coreclr.h index c2d4ab066c76a9..dfda4136681681 100644 --- a/src/coreclr/vm/eventing/eventpipe/ds-rt-coreclr.h +++ b/src/coreclr/vm/eventing/eventpipe/ds-rt-coreclr.h @@ -233,7 +233,7 @@ static uint32_t ds_rt_enable_perfmap (uint32_t type) { - LIMITED_METHOD_CONTRACT; + STANDARD_VM_CONTRACT; #ifdef FEATURE_PERFMAP PerfMap::PerfMapType perfMapType = (PerfMap::PerfMapType)type; diff --git a/src/coreclr/vm/finalizerthread.cpp b/src/coreclr/vm/finalizerthread.cpp index 0e36352feb1e5e..fcfd16e778b58d 100644 --- a/src/coreclr/vm/finalizerthread.cpp +++ b/src/coreclr/vm/finalizerthread.cpp @@ -11,6 +11,9 @@ #include "eventpipeadapter.h" #include "ebr.h" #include "dn-stdio.h" +#ifdef FEATURE_PERFMAP +#include "perfmap.h" +#endif #ifdef FEATURE_COMINTEROP #include "runtimecallablewrapper.h" @@ -154,7 +157,11 @@ bool FinalizerThread::HaveExtraWorkForFinalizer() || YieldProcessorNormalization::IsMeasurementScheduled() || HasDelayedDynamicMethod() || ThreadStore::s_pThreadStore->ShouldTriggerGCForDeadThreads() - || g_EbrCollector.CleanUpRequested(); + || g_EbrCollector.CleanUpRequested() +#ifdef FEATURE_PERFMAP + || PerfMap::HasDeferredEntries() +#endif + ; #endif // TARGET_WASM } @@ -210,6 +217,14 @@ static void DoExtraWorkForFinalizer(Thread* finalizerThread) GCX_PREEMP(); g_EbrCollector.CleanUpPending(); } + +#ifdef FEATURE_PERFMAP + if (PerfMap::HasDeferredEntries()) + { + GCX_PREEMP(); + PerfMap::DrainDeferredEntries(); + } +#endif } OBJECTREF FinalizerThread::GetNextFinalizableObject() diff --git a/src/coreclr/vm/perfmap.cpp b/src/coreclr/vm/perfmap.cpp index d51c75dbe35f01..a04ddb34a7b4da 100644 --- a/src/coreclr/vm/perfmap.cpp +++ b/src/coreclr/vm/perfmap.cpp @@ -36,6 +36,9 @@ bool PerfMap::s_IndividualAllocationStubReporting = false; unsigned PerfMap::s_StubsMapped = 0; CrstStatic PerfMap::s_csPerfMap; +CrstStatic PerfMap::s_csPerfMapDeferred; +PerfMapDeferredEntry * PerfMap::s_pDeferredHead = nullptr; +PerfMapDeferredEntry * PerfMap::s_pDeferredTail = nullptr; bool PerfMapLowGranularityStubs() { @@ -48,6 +51,7 @@ void PerfMap::Initialize() LIMITED_METHOD_CONTRACT; s_csPerfMap.Init(CrstPerfMap); + s_csPerfMapDeferred.Init(CrstPerfMapDeferredActions, CRST_UNSAFE_ANYMODE); PerfMapType perfMapType = (PerfMapType)CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_PerfMapEnabled); PerfMap::Enable(perfMapType, false); @@ -79,9 +83,49 @@ void PerfMap::InitializeConfiguration() s_IndividualAllocationStubReporting = (granularity & 2) != 0; } +// Replay all deferred entries under s_csPerfMap. Caller must already hold s_csPerfMap. +void PerfMap::ReplayDeferredEntries() +{ + STANDARD_VM_CONTRACT; + + _ASSERTE(s_csPerfMap.OwnedByCurrentThread()); + + CrstHolder chDeferred(&s_csPerfMapDeferred); + + PerfMapDeferredEntry * pEntry = s_pDeferredHead; + s_pDeferredHead = nullptr; + s_pDeferredTail = nullptr; + + while (pEntry != nullptr) + { + PerfMapDeferredEntry * pNext = pEntry->next; + + if (s_Current != nullptr) + { + s_Current->WriteLine(pEntry->line); + } + + if (pEntry->timestamp != 0) + { + PAL_PerfJitDump_LogMethodWithTimestamp( + (void*)pEntry->pCode, + pEntry->codeSize, + pEntry->name.GetUTF8(), + nullptr, + nullptr, + pEntry->timestamp, + pEntry->codeBuffer, + pEntry->codeBufferSize); + } + + delete pEntry; + pEntry = pNext; + } +} + void PerfMap::Enable(PerfMapType type, bool sendExisting) { - LIMITED_METHOD_CONTRACT; + STANDARD_VM_CONTRACT; if (type == PerfMapType::DISABLED) { @@ -90,6 +134,7 @@ void PerfMap::Enable(PerfMapType type, bool sendExisting) { CrstHolder ch(&(s_csPerfMap)); + ReplayDeferredEntries(); const char* basePath = InternalConstructPath(); @@ -207,6 +252,7 @@ void PerfMap::Disable() if (s_enabled) { CrstHolder ch(&(s_csPerfMap)); + ReplayDeferredEntries(); s_enabled = false; if (s_Current != nullptr) @@ -220,6 +266,21 @@ void PerfMap::Disable() } } +bool PerfMap::HasDeferredEntries() +{ + LIMITED_METHOD_CONTRACT; + + return s_pDeferredHead != nullptr; +} + +void PerfMap::DrainDeferredEntries() +{ + STANDARD_VM_CONTRACT; + + CrstHolder ch(&s_csPerfMap); + ReplayDeferredEntries(); +} + // Signal that all dependencies (AppDomain, ExecutionManager) are ready. // This method must be called before any code is JITed or restored from R2R image. void PerfMap::SignalDependenciesReady() @@ -294,16 +355,7 @@ void PerfMap::WriteLine(SString& line) void PerfMap::LogJITCompiledMethod(MethodDesc * pMethod, PCODE pCode, size_t codeSize, PrepareCodeConfig *pConfig) { - LIMITED_METHOD_CONTRACT; - - CONTRACTL{ - THROWS; - GC_NOTRIGGER; - MODE_PREEMPTIVE; - PRECONDITION(pMethod != nullptr); - PRECONDITION(pCode != nullptr); - PRECONDITION(codeSize > 0); - } CONTRACTL_END; + STANDARD_VM_CONTRACT; if (!s_enabled) { @@ -333,6 +385,7 @@ void PerfMap::LogJITCompiledMethod(MethodDesc * pMethod, PCODE pCode, size_t cod { CrstHolder ch(&(s_csPerfMap)); + ReplayDeferredEntries(); if(s_Current != nullptr) { @@ -349,7 +402,7 @@ void PerfMap::LogJITCompiledMethod(MethodDesc * pMethod, PCODE pCode, size_t cod // Log a pre-compiled method to the perfmap. void PerfMap::LogPreCompiledMethod(MethodDesc * pMethod, PCODE pCode) { - LIMITED_METHOD_CONTRACT; + STANDARD_VM_CONTRACT; if (!s_enabled) { @@ -380,12 +433,14 @@ void PerfMap::LogPreCompiledMethod(MethodDesc * pMethod, PCODE pCode) if (methodRegionInfo.hotSize > 0) { CrstHolder ch(&(s_csPerfMap)); + ReplayDeferredEntries(); PAL_PerfJitDump_LogMethod((void*)methodRegionInfo.hotStartAddress, methodRegionInfo.hotSize, name.GetUTF8(), nullptr, nullptr, /*reportCodeBlock*/true); } if (methodRegionInfo.coldSize > 0) { CrstHolder ch(&(s_csPerfMap)); + ReplayDeferredEntries(); if (s_ShowOptimizationTiers) { @@ -442,13 +497,20 @@ void PerfMap::LogStubs(const char* stubType, const char* stubOwner, PCODE pCode, SString line; line.Printf(FMT_CODE_ADDR " %x %s\n", pCode, codeSize, name.GetUTF8()); + // Queue the operation under s_csPerfMapDeferred instead of taking s_csPerfMap + // directly. LogStubs may be called while an ANYMODE lock is held, and s_csPerfMap + // is a DEFAULT lock that may trigger a GC mode transition. + PerfMapDeferredEntry * pEntry = new PerfMapDeferredEntry(); + pEntry->name.Set(name); + pEntry->line.Set(line); + pEntry->pCode = pCode; + pEntry->codeSize = codeSize; + + // Only capture timestamp and code buffer when JitDump is active. + // These are expensive operations that are unnecessary for perfmap-only mode. + if (PAL_PerfJitDump_IsStarted()) { - CrstHolder ch(&(s_csPerfMap)); - - if(s_Current != nullptr) - { - s_Current->WriteLine(line); - } + pEntry->timestamp = PAL_PerfJitDump_GetTimeStamp(); // For block-level stub allocations, the memory may be reserved but not yet committed. // Emitting code bytes in that case can cause jitdump logging to fail, and the bytes @@ -457,7 +519,36 @@ void PerfMap::LogStubs(const char* stubType, const char* stubOwner, PCODE pCode, // Even when the memory is committed, block-level stubs are reported at commit time // before the actual stub code has been written, so the code bytes would be zeros or // uninitialized. We therefore skip code bytes for block allocations entirely. - PAL_PerfJitDump_LogMethod((void*)pCode, codeSize, name.GetUTF8(), nullptr, nullptr, /*reportCodeBlock*/ stubAllocationType != PerfMapStubType::Block); + if (stubAllocationType != PerfMapStubType::Block) + { + pEntry->codeBufferSize = codeSize; + pEntry->codeBuffer = new BYTE[codeSize]; + memcpy(pEntry->codeBuffer, (void*)pCode, codeSize); + } + else + { + pEntry->codeBufferSize = 0; + } + } + else + { + pEntry->timestamp = 0; + pEntry->codeBufferSize = 0; + } + pEntry->next = nullptr; + + { + CrstHolder chDeferred(&s_csPerfMapDeferred); + + if (s_pDeferredTail != nullptr) + { + s_pDeferredTail->next = pEntry; + } + else + { + s_pDeferredHead = pEntry; + } + s_pDeferredTail = pEntry; } } EX_CATCH{} EX_END_CATCH diff --git a/src/coreclr/vm/perfmap.h b/src/coreclr/vm/perfmap.h index 4dc23f65e107db..1a19037de365bf 100644 --- a/src/coreclr/vm/perfmap.h +++ b/src/coreclr/vm/perfmap.h @@ -19,6 +19,19 @@ enum class PerfMapStubType Individual }; +struct PerfMapDeferredEntry +{ + SString name; + SString line; + PCODE pCode; + size_t codeSize; + uint64_t timestamp; + NewArrayHolder codeBuffer; + size_t codeBufferSize; + + PerfMapDeferredEntry * next; +}; + class PerfMap { private: @@ -41,6 +54,11 @@ class PerfMap static unsigned s_StubsMapped; static CrstStatic s_csPerfMap; + static CrstStatic s_csPerfMapDeferred; + + // Queue of deferred operations to replay under s_csPerfMap. + static PerfMapDeferredEntry * s_pDeferredHead; + static PerfMapDeferredEntry * s_pDeferredTail; // The file stream to write the map to. FILE * m_fp; @@ -62,6 +80,9 @@ class PerfMap static void InitializeConfiguration(); + // Replay all deferred entries. Must be called while holding s_csPerfMap. + static void ReplayDeferredEntries(); + protected: // Open the perf map file for write. void OpenFile(SString& path); @@ -107,6 +128,12 @@ class PerfMap // Close the map and flush any remaining data. static void Disable(); + // Drain any deferred entries. Called from the finalizer thread's extra work path. + static void DrainDeferredEntries(); + + // Returns true if there are deferred entries waiting to be drained. + static bool HasDeferredEntries(); + // Signal that all dependencies (AppDomain, ExecutionManager) are ready. static void SignalDependenciesReady();