Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/coreclr/inc/CrstTypes.def
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,10 @@ End

Crst PerfMap
AcquiredAfter CodeVersioning AssemblyList
AcquiredBefore PerfMapDeferredActions
End

Crst PerfMapDeferredActions
End

Crst InterfaceDispatchGlobalLists
Expand Down
77 changes: 40 additions & 37 deletions src/coreclr/inc/crsttypes_generated.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,41 +85,42 @@ enum CrstType
CrstPEImage = 67,
CrstPendingTypeLoadEntry = 68,
CrstPerfMap = 69,
CrstPgoData = 70,
CrstPinnedByrefValidation = 71,
CrstPinnedHeapHandleTable = 72,
CrstPregeneratedStringThunks = 73,
CrstProfilerGCRefDataFreeList = 74,
CrstProfilingAPIStatus = 75,
CrstRCWCache = 76,
CrstRCWCleanupList = 77,
CrstReadyToRunEntryPointToMethodDescMap = 78,
CrstReflection = 79,
CrstReJITGlobalRequest = 80,
CrstSigConvert = 81,
CrstSingleUseLock = 82,
CrstStressLog = 83,
CrstStubCache = 84,
CrstStubDispatchCache = 85,
CrstSyncBlockCache = 86,
CrstSyncHashLock = 87,
CrstSystemDomain = 88,
CrstSystemDomainDelayedUnloadList = 89,
CrstThreadIdDispenser = 90,
CrstThreadLocalStorageLock = 91,
CrstThreadStore = 92,
CrstTieredCompilation = 93,
CrstTypeEquivalenceMap = 94,
CrstTypeIDMap = 95,
CrstUMEntryThunkCache = 96,
CrstUMEntryThunkFreeListLock = 97,
CrstUniqueStack = 98,
CrstUnresolvedClassLock = 99,
CrstUnwindInfoTablePendingLock = 100,
CrstUnwindInfoTablePublishLock = 101,
CrstVSDIndirectionCellLock = 102,
CrstWrapperTemplate = 103,
kNumberOfCrstTypes = 104
CrstPerfMapDeferredActions = 70,
CrstPgoData = 71,
CrstPinnedByrefValidation = 72,
CrstPinnedHeapHandleTable = 73,
CrstPregeneratedStringThunks = 74,
CrstProfilerGCRefDataFreeList = 75,
CrstProfilingAPIStatus = 76,
CrstRCWCache = 77,
CrstRCWCleanupList = 78,
CrstReadyToRunEntryPointToMethodDescMap = 79,
CrstReflection = 80,
CrstReJITGlobalRequest = 81,
CrstSigConvert = 82,
CrstSingleUseLock = 83,
CrstStressLog = 84,
CrstStubCache = 85,
CrstStubDispatchCache = 86,
CrstSyncBlockCache = 87,
CrstSyncHashLock = 88,
CrstSystemDomain = 89,
CrstSystemDomainDelayedUnloadList = 90,
CrstThreadIdDispenser = 91,
CrstThreadLocalStorageLock = 92,
CrstThreadStore = 93,
CrstTieredCompilation = 94,
CrstTypeEquivalenceMap = 95,
CrstTypeIDMap = 96,
CrstUMEntryThunkCache = 97,
CrstUMEntryThunkFreeListLock = 98,
CrstUniqueStack = 99,
CrstUnresolvedClassLock = 100,
CrstUnwindInfoTablePendingLock = 101,
CrstUnwindInfoTablePublishLock = 102,
CrstVSDIndirectionCellLock = 103,
CrstWrapperTemplate = 104,
kNumberOfCrstTypes = 105
};

#endif // __CRST_TYPES_INCLUDED
Expand All @@ -131,7 +132,7 @@ enum CrstType
int g_rgCrstLevelMap[] =
{
9, // CrstAppDomainCache
2, // CrstAssemblyList
3, // CrstAssemblyList
13, // CrstAssemblyLoader
2, // CrstAsyncContinuations
3, // CrstAvailableClass
Expand Down Expand Up @@ -199,7 +200,8 @@ int g_rgCrstLevelMap[] =
0, // CrstNotifyGdb
4, // CrstPEImage
20, // CrstPendingTypeLoadEntry
0, // CrstPerfMap
2, // CrstPerfMap
0, // CrstPerfMapDeferredActions
3, // CrstPgoData
0, // CrstPinnedByrefValidation
15, // CrstPinnedHeapHandleTable
Expand Down Expand Up @@ -309,6 +311,7 @@ LPCSTR g_rgCrstNameMap[] =
"CrstPEImage",
"CrstPendingTypeLoadEntry",
"CrstPerfMap",
"CrstPerfMapDeferredActions",
"CrstPgoData",
"CrstPinnedByrefValidation",
"CrstPinnedHeapHandleTable",
Expand Down
12 changes: 12 additions & 0 deletions src/coreclr/pal/inc/pal.h
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,18 @@ PALAPI
// Log a method to the jitdump file.
PAL_PerfJitDump_LogMethod(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo, bool reportCodeBlock);

PALIMPORT
int
PALAPI
// Log a method to the jitdump file with a pre-captured timestamp and code buffer.
PAL_PerfJitDump_LogMethodWithTimestamp(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo, uint64_t timestamp, void* codeBuffer, size_t codeBufferSize);

PALIMPORT
uint64_t
PALAPI
// Get the current timestamp in the same format used by jitdump records.
PAL_PerfJitDump_GetTimeStamp();

PALIMPORT
int
PALAPI
Expand Down
108 changes: 108 additions & 0 deletions src/coreclr/pal/src/misc/perfjitdump.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,86 @@ struct PerfJitDumpState
return 0;
}

int LogMethodWithTimestamp(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo, uint64_t timestamp, void* codeBuffer, size_t codeBufferSize)
{
int result = 0;

if (enabled)
{
size_t symbolLen = strlen(symbol);

JitCodeLoadRecord record;

size_t bytesRemaining = sizeof(JitCodeLoadRecord) + symbolLen + 1 + codeBufferSize;

record.header.timestamp = timestamp;
record.vma = (uint64_t) pCode;
record.code_addr = (uint64_t) pCode;
record.code_size = codeSize;
record.header.total_size = bytesRemaining;
Comment on lines +333 to +349

iovec items[] = {
{ &record, sizeof(JitCodeLoadRecord) },
{ (void *)symbol, symbolLen + 1 },
{ codeBuffer, codeBufferSize },
};
size_t itemsCount = sizeof(items) / sizeof(items[0]);

size_t itemsWritten = 0;

if (!enabled)
goto exit2;

// Increment codeIndex while locked
record.code_index = ++codeIndex;

do
{
result = writev(fd, items + itemsWritten, itemsCount - itemsWritten);

if ((size_t)result == bytesRemaining)
break;

if (result == -1)
{
if (errno == EINTR)
continue;

return FatalError();
}

// Detect unexpected failure cases.
_ASSERTE(bytesRemaining > (size_t)result);
_ASSERTE(result > 0);

// Handle partial write case

bytesRemaining -= result;

do
{
if ((size_t)result < items[itemsWritten].iov_len)
{
items[itemsWritten].iov_len -= result;
items[itemsWritten].iov_base = (void*)((size_t) items[itemsWritten].iov_base + result);
break;
}
else
{
result -= items[itemsWritten].iov_len;
itemsWritten++;

// Detect unexpected failure case.
_ASSERTE(itemsWritten < itemsCount);
}
} while (result > 0);
} while (true);

}
exit2:
return 0;
}

int Finish()
{
int result = 0;
Expand Down Expand Up @@ -394,6 +474,20 @@ PAL_PerfJitDump_LogMethod(void* pCode, size_t codeSize, const char* symbol, void
return GetState().LogMethod(pCode, codeSize, symbol, debugInfo, unwindInfo, reportCodeBlock);
}

int
PALAPI
PAL_PerfJitDump_LogMethodWithTimestamp(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo, uint64_t timestamp, void* codeBuffer, size_t codeBufferSize)
{
return GetState().LogMethodWithTimestamp(pCode, codeSize, symbol, debugInfo, unwindInfo, timestamp, codeBuffer, codeBufferSize);
}

uint64_t
PALAPI
PAL_PerfJitDump_GetTimeStamp()
{
return GetTimeStampNS();
}

int
PALAPI
PAL_PerfJitDump_Finish()
Expand Down Expand Up @@ -424,6 +518,20 @@ PAL_PerfJitDump_LogMethod(void* pCode, size_t codeSize, const char* symbol, void
return 0;
}

int
PALAPI
PAL_PerfJitDump_LogMethodWithTimestamp(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo, uint64_t timestamp, void* codeBuffer, size_t codeBufferSize)
{
return 0;
}

uint64_t
PALAPI
PAL_PerfJitDump_GetTimeStamp()
{
return 0;
}

int
PALAPI
PAL_PerfJitDump_Finish()
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/eventing/eventpipe/ds-rt-coreclr.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ static
uint32_t
ds_rt_enable_perfmap (uint32_t type)
{
LIMITED_METHOD_CONTRACT;
STANDARD_VM_CONTRACT;

#ifdef FEATURE_PERFMAP
PerfMap::PerfMapType perfMapType = (PerfMap::PerfMapType)type;
Expand Down
17 changes: 16 additions & 1 deletion src/coreclr/vm/finalizerthread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#include "eventpipeadapter.h"
#include "ebr.h"
#include "dn-stdio.h"
#ifdef FEATURE_PERFMAP
#include "perfmap.h"
#endif

#ifdef FEATURE_COMINTEROP
#include "runtimecallablewrapper.h"
Expand Down Expand Up @@ -154,7 +157,11 @@ bool FinalizerThread::HaveExtraWorkForFinalizer()
|| YieldProcessorNormalization::IsMeasurementScheduled()
|| HasDelayedDynamicMethod()
|| ThreadStore::s_pThreadStore->ShouldTriggerGCForDeadThreads()
|| g_EbrCollector.CleanUpRequested();
|| g_EbrCollector.CleanUpRequested()
#ifdef FEATURE_PERFMAP
|| PerfMap::HasDeferredEntries()
#endif
;

#endif // TARGET_WASM
}
Expand Down Expand Up @@ -210,6 +217,14 @@ static void DoExtraWorkForFinalizer(Thread* finalizerThread)
GCX_PREEMP();
g_EbrCollector.CleanUpPending();
}

#ifdef FEATURE_PERFMAP
if (PerfMap::HasDeferredEntries())
{
GCX_PREEMP();
PerfMap::DrainDeferredEntries();
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not clear to me why we need to introduce the deferred logging to fix the log ordering violation. Is the deferred logging required part of the fix for the deadlock or is it trying to be an additional optimization?

If it is required for some reason, are there negative side-effect? Is it going to introduce a window where the tools that use perfmap can produce bad stacktraces?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we are working around code like

RETURN GenerateDispatchStubLong(addrOfCode,
that does heavy lifting in cooperative mode, I think it would be better to just switch to preemptive mode.

I am not sure the pMayHaveReenteredCooperativeGCMode tricks in this code are worth it.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it going to introduce a window where the tools that use perfmap can produce bad stacktraces?

In theory I think this could give us missing symbolication of particular stub stack frames, but I don't expect it to impact the overall unwind. While having 100% correct stacks would be the ideal, pragmatically I imagine the experience would be pretty good most of the time as long as the timing delays are short. If we can get the ideal scenario while still staying low enough risk for a servicing fix of course I'd have no complaints :)

}
#endif
}

OBJECTREF FinalizerThread::GetNextFinalizableObject()
Expand Down
Loading
Loading