From 4034dd557288431a374334244edf2b9a7dbe3581 Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Mon, 16 Mar 2026 07:31:43 +0100 Subject: [PATCH 1/6] Keep symbols and capture debug artifacts for native failures --- ci/python-gate.libsonnet | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/python-gate.libsonnet b/ci/python-gate.libsonnet index 0ae29d7123..0c560dce1f 100644 --- a/ci/python-gate.libsonnet +++ b/ci/python-gate.libsonnet @@ -142,6 +142,7 @@ BISECT_EMAIL_TO_PATTERN: ".*@oracle.com", TRUFFLE_STRICT_OPTION_DEPRECATION: "true", npm_config_registry: $.overlay_imports.npm_config_registry, + CFLAGS: "-ggdb", }, linux: { common: ENV_POSIX + {}, @@ -241,6 +242,8 @@ "graal_dumps/*/*", "bench-results.json", "raw-results.json", + "mxbuild/*/libpythonvm/libpythonvm.so.debug", + "mxbuild/*/GRAALPY_STANDALONE_COMMON/lib/graalpy*/libpython-native.so", ], //------------------------------------------------------------------------------------------------------------------ From ac6509636771a231f5c3a17bd3e6ef4d26f6a93d Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Mon, 16 Mar 2026 08:35:27 +0100 Subject: [PATCH 2/6] Fix native raw allocator accounting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correct GraalPy-specific raw allocator bookkeeping in obmalloc.c by charging only the realloc size delta, rolling back reserved bytes when malloc/calloc/realloc fail, and keeping the tracked native-memory total consistent with the actual libc allocation outcome. This fixes a real regression in GraalPy’s custom allocator path while preserving the existing diagnostic work for the NumPy periodic corruption investigation. --- .../src/obmalloc.c | 36 ++++++++++++++----- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/graalpython/com.oracle.graal.python.cext/src/obmalloc.c b/graalpython/com.oracle.graal.python.cext/src/obmalloc.c index ae39e1adac..8cffa6e9c2 100644 --- a/graalpython/com.oracle.graal.python.cext/src/obmalloc.c +++ b/graalpython/com.oracle.graal.python.cext/src/obmalloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -426,10 +426,15 @@ _GraalPyMem_RawMalloc(void *ctx, size_t size) To solve these problems, allocate an extra byte. */ if (size == 0) size = 1; - if (_GraalPyMem_PrepareAlloc((GraalPyMem_t*) ctx, size)) { + GraalPyMem_t *state = (GraalPyMem_t *)ctx; + if (_GraalPyMem_PrepareAlloc(state, size)) { return NULL; } mem_head_t *ptr_with_head = (mem_head_t *)malloc(size + sizeof(mem_head_t)); + if (ptr_with_head == NULL) { + state->allocated_memory -= size; + return NULL; + } ptr_with_head->size = size; return FROM_MEM_HEAD(ptr_with_head); } @@ -450,13 +455,18 @@ _GraalPyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize) elsize = 1; } size_t nbytes = nelem * elsize; - if (_GraalPyMem_PrepareAlloc((GraalPyMem_t*) ctx, nbytes)) { + GraalPyMem_t *state = (GraalPyMem_t *)ctx; + if (_GraalPyMem_PrepareAlloc(state, nbytes)) { return NULL; } /* We cannot use 'calloc' because we need to allocate following layout: [ mem_head_t ] [ e_0 ] [ e_1 ] [ e_2 ] ... [ n_nelem ] */ size_t total = nbytes + sizeof(mem_head_t); mem_head_t *ptr_with_head = (mem_head_t *)malloc(total); + if (ptr_with_head == NULL) { + state->allocated_memory -= nbytes; + return NULL; + } memset(ptr_with_head, 0, total); ptr_with_head->size = nbytes; return FROM_MEM_HEAD(ptr_with_head); @@ -481,17 +491,25 @@ _GraalPyMem_RawRealloc(void *ctx, void *ptr, size_t size) old_size = 0; } - // account for the difference in size - if (old_size >= size) { - /* In case of "shrinking", just subtract the counter but don't trigger - the Java GC. */ - state->allocated_memory -= size; - } else if (_GraalPyMem_PrepareAlloc(state, size - old_size)) { + if (old_size < size && _GraalPyMem_PrepareAlloc(state, size - old_size)) { return NULL; } mem_head_t *ptr_with_head = (mem_head_t *)realloc(old, size + sizeof(mem_head_t)); + if (ptr_with_head == NULL) { + if (old_size < size) { + state->allocated_memory -= size - old_size; + } + return NULL; + } + + if (old_size > size) { + /* In case of "shrinking", just subtract the difference but don't + trigger the Java GC. */ + state->allocated_memory -= old_size - size; + } + ptr_with_head->size = size; return FROM_MEM_HEAD(ptr_with_head); } From 389f73f54c3ed661c4ff3f7dc6956845fb9314d9 Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Mon, 16 Mar 2026 09:13:33 +0100 Subject: [PATCH 3/6] Validate raw allocator headers Teach GraalPy's raw native allocator to stamp each allocation with a fixed header magic and to verify that header on realloc and free before trusting the recorded size. This keeps the fast path lightweight, aborts immediately on corrupted or foreign headers, and makes allocator misuse fail at the point where the bad pointer first re-enters obmalloc rather than later in libc free(). --- .../src/obmalloc.c | 37 ++++++++++++++++--- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/graalpython/com.oracle.graal.python.cext/src/obmalloc.c b/graalpython/com.oracle.graal.python.cext/src/obmalloc.c index 8cffa6e9c2..4d0669b6d2 100644 --- a/graalpython/com.oracle.graal.python.cext/src/obmalloc.c +++ b/graalpython/com.oracle.graal.python.cext/src/obmalloc.c @@ -56,6 +56,7 @@ typedef struct { /* Get the object given the GC head */ #define FROM_MEM_HEAD(g) ((void *)(((mem_head_t *)g)+1)) +#define GRAALPY_MEM_HEAD_MAGIC ((size_t)0x47505241574D454DULL) #define MAX_COLLECTION_RETRIES (7) #define COLLECTION_DELAY_INCREMENT (50) @@ -73,6 +74,32 @@ typedef struct { static GraalPyMem_t _GraalPyMem_State = { 0, 0, 0 }; +static void +_GraalPyMem_InitHeader(mem_head_t *ptr_with_head, size_t size) +{ + ptr_with_head->size = size; + ptr_with_head->dummy = GRAALPY_MEM_HEAD_MAGIC; +} + +static void +_GraalPyMem_FatalInvalidHeader(const char *func, void *ptr, const mem_head_t *ptr_with_head) +{ + GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO, + "%s: invalid raw allocation header for ptr=%p head=%p size=%lu dummy=0x%lx\n", + func, ptr, ptr_with_head, (unsigned long) ptr_with_head->size, (unsigned long) ptr_with_head->dummy); + Py_FatalError("invalid GraalPy raw allocation header"); +} + +static mem_head_t * +_GraalPyMem_GetValidatedHead(const char *func, void *ptr) +{ + mem_head_t *ptr_with_head = AS_MEM_HEAD(ptr); + if (UNLIKELY(ptr_with_head->dummy != GRAALPY_MEM_HEAD_MAGIC)) { + _GraalPyMem_FatalInvalidHeader(func, ptr, ptr_with_head); + } + return ptr_with_head; +} + #if 0 // GraalPy change /* bpo-35053: Declare tracemalloc configuration here rather than Modules/_tracemalloc.c because _tracemalloc can be compiled as dynamic @@ -435,7 +462,7 @@ _GraalPyMem_RawMalloc(void *ctx, size_t size) state->allocated_memory -= size; return NULL; } - ptr_with_head->size = size; + _GraalPyMem_InitHeader(ptr_with_head, size); return FROM_MEM_HEAD(ptr_with_head); } @@ -468,7 +495,7 @@ _GraalPyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize) return NULL; } memset(ptr_with_head, 0, total); - ptr_with_head->size = nbytes; + _GraalPyMem_InitHeader(ptr_with_head, nbytes); return FROM_MEM_HEAD(ptr_with_head); } @@ -484,7 +511,7 @@ _GraalPyMem_RawRealloc(void *ctx, void *ptr, size_t size) size = 1; if (ptr != NULL) { - old = AS_MEM_HEAD(ptr); + old = _GraalPyMem_GetValidatedHead(__func__, ptr); old_size = old->size; } else { old = NULL; @@ -510,7 +537,7 @@ _GraalPyMem_RawRealloc(void *ctx, void *ptr, size_t size) state->allocated_memory -= old_size - size; } - ptr_with_head->size = size; + _GraalPyMem_InitHeader(ptr_with_head, size); return FROM_MEM_HEAD(ptr_with_head); } @@ -524,7 +551,7 @@ _GraalPyMem_RawFree(void *ctx, void *ptr) if (ptr == NULL) return; GraalPyMem_t *state = (GraalPyMem_t *)ctx; - mem_head_t *ptr_with_head = AS_MEM_HEAD(ptr); + mem_head_t *ptr_with_head = _GraalPyMem_GetValidatedHead(__func__, ptr); const size_t size = ptr_with_head->size; if (state->allocated_memory < size) { GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO, From aae4f89764beb300f097ecfdfde4219617b528f3 Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Mon, 16 Mar 2026 09:21:42 +0100 Subject: [PATCH 4/6] Add optional raw allocator diagnostics Add two experimental GraalPy expert options that are threaded into the native C API option bitmask to enable heavier allocator debugging only when requested. With these options enabled, obmalloc can poison freed raw-allocation blocks and record a lightweight rolling history of raw allocation sites, while keeping the new diagnostics off the default path unless an investigation explicitly turns them on. --- .../com.oracle.graal.python.cext/src/capi.h | 10 +- .../src/obmalloc.c | 133 +++++++++++++++++- .../modules/cext/PythonCextBuiltins.java | 10 +- .../graal/python/runtime/PythonOptions.java | 6 + 4 files changed, 152 insertions(+), 7 deletions(-) diff --git a/graalpython/com.oracle.graal.python.cext/src/capi.h b/graalpython/com.oracle.graal.python.cext/src/capi.h index 4323916fdb..5b791f0a9e 100644 --- a/graalpython/com.oracle.graal.python.cext/src/capi.h +++ b/graalpython/com.oracle.graal.python.cext/src/capi.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -96,6 +96,8 @@ #define PY_TRUFFLE_LOG_FINEST 0x20 #define PY_TRUFFLE_DEBUG_CAPI 0x40 #define PY_TRUFFLE_PYTHON_GC 0x80 +#define PY_TRUFFLE_POISON_NATIVE_MEMORY_ON_FREE 0x100 +#define PY_TRUFFLE_SAMPLE_NATIVE_MEMORY_ALLOC_SITES 0x200 typedef struct mmap_object mmap_object; typedef struct _gc_runtime_state GCState; // originally in 'gcmodule.c' @@ -232,6 +234,12 @@ static MUST_INLINE int GraalPyPrivate_Debug_CAPI() { static MUST_INLINE int GraalPyPrivate_PythonGC() { return Py_Truffle_Options & PY_TRUFFLE_PYTHON_GC; } +static MUST_INLINE int GraalPyPrivate_PoisonNativeMemoryOnFree() { + return Py_Truffle_Options & PY_TRUFFLE_POISON_NATIVE_MEMORY_ON_FREE; +} +static MUST_INLINE int GraalPyPrivate_SampleNativeMemoryAllocSites() { + return Py_Truffle_Options & PY_TRUFFLE_SAMPLE_NATIVE_MEMORY_ALLOC_SITES; +} static void GraalPyPrivate_Log(int level, const char *format, ...) diff --git a/graalpython/com.oracle.graal.python.cext/src/obmalloc.c b/graalpython/com.oracle.graal.python.cext/src/obmalloc.c index 4d0669b6d2..eb27176f9f 100644 --- a/graalpython/com.oracle.graal.python.cext/src/obmalloc.c +++ b/graalpython/com.oracle.graal.python.cext/src/obmalloc.c @@ -40,6 +40,9 @@ */ #include "capi.h" #include "pycore_pymem.h" +#ifdef MS_WINDOWS +#include +#endif /* * This header needs to be 16 bytes long to ensure that allocations will still be aligned to 16 byte boundaries. @@ -50,6 +53,15 @@ typedef struct { size_t dummy; } mem_head_t; +typedef struct { + void *ptr; + void *stack[12]; + size_t size; + size_t depth; + unsigned long long serial; + char operation; +} GraalPyMemSample_t; + /* Get an object's GC head */ #define AS_MEM_HEAD(o) ((mem_head_t *)(o)-1) @@ -57,6 +69,12 @@ typedef struct { #define FROM_MEM_HEAD(g) ((void *)(((mem_head_t *)g)+1)) #define GRAALPY_MEM_HEAD_MAGIC ((size_t)0x47505241574D454DULL) +#define GRAALPY_MEM_HEAD_POISON ((size_t)0xDDDDBAD0DDDDBAD0ULL) +#define GRAALPY_MEM_SAMPLE_RING_SIZE (4096) +#define GRAALPY_MEM_SAMPLE_HISTORY (8) +#define GRAALPY_MEM_SAMPLE_STACK_DEPTH (12) +#define GRAALPY_MEM_SAMPLE_STACK_SKIP (2) +#define GRAALPY_MEM_SAMPLE_USEFUL_DEPTH (10) #define MAX_COLLECTION_RETRIES (7) #define COLLECTION_DELAY_INCREMENT (50) @@ -73,6 +91,86 @@ typedef struct { } GraalPyMem_t; static GraalPyMem_t _GraalPyMem_State = { 0, 0, 0 }; +static GraalPyMemSample_t _GraalPyMem_Samples[GRAALPY_MEM_SAMPLE_RING_SIZE] = {{0}}; +static unsigned long long _GraalPyMem_SampleSerial = 0; +static size_t _GraalPyMem_SampleIndex = 0; + +static MUST_INLINE int +_GraalPyMem_PoisonOnFreeEnabled(void) +{ + return GraalPyPrivate_PoisonNativeMemoryOnFree(); +} + +static MUST_INLINE int +_GraalPyMem_SampleAllocSitesEnabled(void) +{ + return GraalPyPrivate_SampleNativeMemoryAllocSites(); +} + +static void +_GraalPyMem_CaptureSampleStack(GraalPyMemSample_t *sample) +{ +#if (__linux__ && __GNU_LIBRARY__) + void *frames[GRAALPY_MEM_SAMPLE_STACK_DEPTH]; + int depth = backtrace(frames, GRAALPY_MEM_SAMPLE_STACK_DEPTH); + size_t start = depth > GRAALPY_MEM_SAMPLE_STACK_SKIP ? GRAALPY_MEM_SAMPLE_STACK_SKIP : (size_t) depth; + sample->depth = (size_t) depth - start; + if (sample->depth > GRAALPY_MEM_SAMPLE_USEFUL_DEPTH) { + sample->depth = GRAALPY_MEM_SAMPLE_USEFUL_DEPTH; + } + memcpy(sample->stack, frames + start, sample->depth * sizeof(void *)); +#elif defined(MS_WINDOWS) + sample->depth = (size_t) CaptureStackBackTrace(GRAALPY_MEM_SAMPLE_STACK_SKIP, + GRAALPY_MEM_SAMPLE_USEFUL_DEPTH, sample->stack, NULL); +#else + sample->depth = 0; +#endif +} + +static void +_GraalPyMem_RecordSample(char operation, void *ptr, size_t size) +{ + if (UNLIKELY(ptr == NULL)) { + return; + } + if (LIKELY(!_GraalPyMem_SampleAllocSitesEnabled())) { + return; + } + + size_t index = _GraalPyMem_SampleIndex++ % GRAALPY_MEM_SAMPLE_RING_SIZE; + GraalPyMemSample_t *sample = &_GraalPyMem_Samples[index]; + sample->ptr = ptr; + sample->size = size; + sample->serial = ++_GraalPyMem_SampleSerial; + sample->operation = operation; + _GraalPyMem_CaptureSampleStack(sample); +} + +static void +_GraalPyMem_LogRecentSamples(const char *func, void *ptr) +{ + if (LIKELY(!_GraalPyMem_SampleAllocSitesEnabled())) { + return; + } + + size_t next_index = _GraalPyMem_SampleIndex; + int printed = 0; + for (size_t offset = 0; offset < GRAALPY_MEM_SAMPLE_RING_SIZE && printed < GRAALPY_MEM_SAMPLE_HISTORY; offset++) { + size_t index = (next_index + GRAALPY_MEM_SAMPLE_RING_SIZE - offset - 1) % GRAALPY_MEM_SAMPLE_RING_SIZE; + const GraalPyMemSample_t *sample = &_GraalPyMem_Samples[index]; + if (sample->ptr == ptr && sample->serial != 0) { + GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO, + "%s: recent raw memory sample #%llu op=%c ptr=%p size=%lu depth=%lu\n", + func, sample->serial, sample->operation, sample->ptr, (unsigned long) sample->size, (unsigned long) sample->depth); + for (size_t frame_index = 0; frame_index < sample->depth; frame_index++) { + GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO, + "%s: sample #%llu frame[%lu]=%p\n", + func, sample->serial, (unsigned long) frame_index, sample->stack[frame_index]); + } + printed++; + } + } +} static void _GraalPyMem_InitHeader(mem_head_t *ptr_with_head, size_t size) @@ -81,12 +179,28 @@ _GraalPyMem_InitHeader(mem_head_t *ptr_with_head, size_t size) ptr_with_head->dummy = GRAALPY_MEM_HEAD_MAGIC; } +static void +_GraalPyMem_PoisonBlock(mem_head_t *ptr_with_head, size_t size) +{ + if (LIKELY(!_GraalPyMem_PoisonOnFreeEnabled())) { + return; + } + + memset(ptr_with_head, 0xDB, sizeof(mem_head_t) + size); + ptr_with_head->size = GRAALPY_MEM_HEAD_POISON; + ptr_with_head->dummy = GRAALPY_MEM_HEAD_POISON; +} + static void _GraalPyMem_FatalInvalidHeader(const char *func, void *ptr, const mem_head_t *ptr_with_head) { + const char *reason = (ptr_with_head->size == GRAALPY_MEM_HEAD_POISON && ptr_with_head->dummy == GRAALPY_MEM_HEAD_POISON) + ? "poisoned raw allocation header" + : "invalid raw allocation header"; GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO, - "%s: invalid raw allocation header for ptr=%p head=%p size=%lu dummy=0x%lx\n", - func, ptr, ptr_with_head, (unsigned long) ptr_with_head->size, (unsigned long) ptr_with_head->dummy); + "%s: %s for ptr=%p head=%p size=%lu dummy=0x%lx\n", + func, reason, ptr, ptr_with_head, (unsigned long) ptr_with_head->size, (unsigned long) ptr_with_head->dummy); + _GraalPyMem_LogRecentSamples(func, ptr); Py_FatalError("invalid GraalPy raw allocation header"); } @@ -352,7 +466,9 @@ PyMem_RawMalloc(size_t size) */ if (size > (size_t)PY_SSIZE_T_MAX) return NULL; - return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size); + void *ptr = _PyMem_Raw.malloc(_PyMem_Raw.ctx, size); + _GraalPyMem_RecordSample('m', ptr, size == 0 ? 1 : size); + return ptr; } void * @@ -361,7 +477,10 @@ PyMem_RawCalloc(size_t nelem, size_t elsize) /* see PyMem_RawMalloc() */ if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize) return NULL; - return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize); + void *ptr = _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize); + size_t nbytes = (nelem == 0 || elsize == 0) ? 1 : nelem * elsize; + _GraalPyMem_RecordSample('c', ptr, nbytes); + return ptr; } void* @@ -370,11 +489,14 @@ PyMem_RawRealloc(void *ptr, size_t new_size) /* see PyMem_RawMalloc() */ if (new_size > (size_t)PY_SSIZE_T_MAX) return NULL; - return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size); + void *new_ptr = _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size); + _GraalPyMem_RecordSample('r', new_ptr, new_size == 0 ? 1 : new_size); + return new_ptr; } void PyMem_RawFree(void *ptr) { + _GraalPyMem_RecordSample('f', ptr, 0); _PyMem_Raw.free(_PyMem_Raw.ctx, ptr); } @@ -560,5 +682,6 @@ _GraalPyMem_RawFree(void *ctx, void *ptr) state->allocated_memory = size; } state->allocated_memory -= size; + _GraalPyMem_PoisonBlock(ptr_with_head, size); free(ptr_with_head); } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java index c0bd0d172f..631acc4906 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -1643,6 +1643,8 @@ static int doGeneric(@Bind Node inliningTarget) { private static final int LOG_FINEST = 0x20; private static final int DEBUG_CAPI = 0x40; private static final int PYTHON_GC = 0x80; + private static final int POISON_NATIVE_MEMORY_ON_FREE = 0x100; + private static final int SAMPLE_NATIVE_MEMORY_ALLOC_SITES = 0x200; /* * These should be kept so they can be shared across multiple contexts in the same engine, if @@ -1682,6 +1684,12 @@ int getNativeOptions() { if (language.getEngineOption(PythonOptions.PythonGC)) { options |= PYTHON_GC; } + if (language.getEngineOption(PythonOptions.PoisonNativeMemoryOnFree)) { + options |= POISON_NATIVE_MEMORY_ON_FREE; + } + if (language.getEngineOption(PythonOptions.SampleNativeMemoryAllocSites)) { + options |= SAMPLE_NATIVE_MEMORY_ALLOC_SITES; + } return options; } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java index 10f6a8bb03..7928d53441 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java @@ -399,6 +399,12 @@ public static void checkBytecodeDSLEnv() { @Option(category = OptionCategory.EXPERT, usageSyntax = "", help = "Initial native memory heap size that triggers a GC (default: 256 MB).") // public static final OptionKey InitialNativeMemory = new OptionKey<>(1L << 28); + @EngineOption @Option(category = OptionCategory.EXPERT, usageSyntax = "true|false", help = "Poison GraalPy raw allocator headers and payloads before freeing native memory blocks.", stability = OptionStability.EXPERIMENTAL) // + public static final OptionKey PoisonNativeMemoryOnFree = new OptionKey<>(false); + + @EngineOption @Option(category = OptionCategory.EXPERT, usageSyntax = "true|false", help = "Record a lightweight rolling history of GraalPy raw native memory allocation sites for allocator debugging.", stability = OptionStability.EXPERIMENTAL) // + public static final OptionKey SampleNativeMemoryAllocSites = new OptionKey<>(false); + @Option(category = OptionCategory.EXPERT, usageSyntax = "true|false", help = "Use the panama backend for NFI.", stability = OptionStability.EXPERIMENTAL) // public static final OptionKey UsePanama = new OptionKey<>(false); // see [GR-67358] From 0cc24a8c74c5f668a04fa977eb15d98b6e4bd2d9 Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Mon, 16 Mar 2026 10:30:11 +0100 Subject: [PATCH 5/6] Add a little helper for debugging benchmarks with native problems --- docs/contributor/CONTRIBUTING.md | 3 +++ mx.graalpython/mx_graalpython_benchmark.py | 13 ++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/docs/contributor/CONTRIBUTING.md b/docs/contributor/CONTRIBUTING.md index 9b509d92ad..84aba00220 100644 --- a/docs/contributor/CONTRIBUTING.md +++ b/docs/contributor/CONTRIBUTING.md @@ -287,6 +287,9 @@ mx benchmark meso:nbody3 \ -Dgraal.MethodFilter=*measure* ``` +For debugging native problems in benchmark runs, there's `BENCHMARK_DEBUG_ARGS` in `mx_graalpython_benchmark.py` to log more stuff for debugging, at the cost of performance. +This is intended for focused reproducer runs on a branch. + ### A note on terminology Note that there may be a little confusion about the configuration names of benchmarks. diff --git a/mx.graalpython/mx_graalpython_benchmark.py b/mx.graalpython/mx_graalpython_benchmark.py index 9c1b412c8f..4877be192f 100644 --- a/mx.graalpython/mx_graalpython_benchmark.py +++ b/mx.graalpython/mx_graalpython_benchmark.py @@ -86,6 +86,17 @@ BENCH_BGV = 'benchmarks-bgv' +BENCHMARK_DEBUG_ARGS = ( + # These first two are not /too/ bad for runtime + # '--python.PoisonNativeMemoryOnFree=true', + # '--python.SampleNativeMemoryAllocSites=true', + + # These below can be *extremely* heavy + # '--python.TraceNativeMemory=true', + # '--python.TraceNativeMemoryCalls=true', + # '--log.python.level=FINER', +) + # ---------------------------------------------------------------------------------------------------------------------- # # utils @@ -1062,7 +1073,7 @@ def register_vms(suite, sandboxed_options): def add_graalpy_vm(name, *extra_polyglot_args): graalpy_vms.append((name, extra_polyglot_args)) - python_vm_registry.add_vm(GraalPythonVm(config_name=name, extra_polyglot_args=extra_polyglot_args), suite, 10) + python_vm_registry.add_vm(GraalPythonVm(config_name=name, extra_polyglot_args=BENCHMARK_DEBUG_ARGS + extra_polyglot_args), suite, 10) # GraalPy VMs: add_graalpy_vm(CONFIGURATION_DEFAULT) From a275a32716305174d737cc45c9287414977d5346 Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Tue, 17 Mar 2026 09:44:44 +0100 Subject: [PATCH 6/6] Combine helpers for capturing stack traces and make them try harder to print symbolic names --- .../CMakeLists.txt | 3 +- .../com.oracle.graal.python.cext/src/capi.h | 22 +- .../src/graalpy_stacktrace.c | 279 ++++++++++++++++++ .../src/obmalloc.c | 29 +- 4 files changed, 295 insertions(+), 38 deletions(-) create mode 100644 graalpython/com.oracle.graal.python.cext/src/graalpy_stacktrace.c diff --git a/graalpython/com.oracle.graal.python.cext/CMakeLists.txt b/graalpython/com.oracle.graal.python.cext/CMakeLists.txt index 8a26d9d95e..27df0cb568 100644 --- a/graalpython/com.oracle.graal.python.cext/CMakeLists.txt +++ b/graalpython/com.oracle.graal.python.cext/CMakeLists.txt @@ -162,7 +162,7 @@ set(SRC_FILES ${CAPI_SRC}/codecs.c ${CAPI_SRC}/setobject.c ${CAPI_SRC}/compile.c ${CAPI_SRC}/fileobject.c ${CAPI_SRC}/pystrcmp.c ${CAPI_SRC}/getversion.c ${CAPI_SRC}/genobject.c ${CAPI_SRC}/methodobject.c ${CAPI_SRC}/boolobject.c ${CAPI_SRC}/pylifecycle.c ${CAPI_SRC}/errors.c ${CAPI_SRC}/signals.c ${CAPI_SRC}/datetime.c ${CAPI_SRC}/call.c - ${CAPI_SRC}/getargs.c ${CAPI_SRC}/tracemalloc.c ${CAPI_SRC}/initconfig.c + ${CAPI_SRC}/getargs.c ${CAPI_SRC}/tracemalloc.c ${CAPI_SRC}/initconfig.c ${CAPI_SRC}/graalpy_stacktrace.c ) file(GLOB_RECURSE ACTUAL_SRC_FILES @@ -454,6 +454,7 @@ if(WIN32) if (NOT MSVC) target_compile_options(${TARGET_LIBPYTHON} PRIVATE "-fmsc-version=1920") endif() + target_link_libraries(${TARGET_LIBPYTHON} dbghelp) else() # Link to math library; required for functions like 'hypot' or similar target_link_libraries(${TARGET_LIBPYTHON} m) diff --git a/graalpython/com.oracle.graal.python.cext/src/capi.h b/graalpython/com.oracle.graal.python.cext/src/capi.h index 5b791f0a9e..e20945ced8 100644 --- a/graalpython/com.oracle.graal.python.cext/src/capi.h +++ b/graalpython/com.oracle.graal.python.cext/src/capi.h @@ -171,16 +171,7 @@ extern Py_LOCAL_SYMBOL int8_t *_graalpy_finalizing; #if (__linux__ && __GNU_LIBRARY__) #include #include -#include #include -static void print_c_stacktrace() { - fprintf(stderr, "Native stacktrace:\n"); - intptr_t stack[16]; - size_t stack_size = backtrace((void *)stack, sizeof(stack) / sizeof(stack[0])); - backtrace_symbols_fd((void *)stack, stack_size, STDERR_FILENO); - fflush(stderr); -} - static void attach_gdb() { pid_t my_pid = getpid(); char* pathname = "/bin/sh"; @@ -199,15 +190,20 @@ static void attach_gdb() { } } #else -static void print_c_stacktrace() { - // not supported -} - static void attach_gdb() { // not supported } #endif +size_t GraalPyPrivate_CaptureStacktrace(void **frames, size_t max_depth, size_t skip); +void GraalPyPrivate_PrintCapturedStacktrace(FILE *file, const char *header, void *const *frames, size_t depth); +void GraalPyPrivate_PrintCurrentStacktrace(FILE *file, const char *header, size_t max_depth, size_t skip); +void GraalPyPrivate_LogCapturedStacktrace(int level, const char *prefix, void *const *frames, size_t depth); + +static void print_c_stacktrace() { + GraalPyPrivate_PrintCurrentStacktrace(stderr, "Native stacktrace:\n", 16, 0); +} + /* Flags definitions representing global (debug) options. */ static MUST_INLINE int GraalPyPrivate_Trace_Memory() { return Py_Truffle_Options & PY_TRUFFLE_TRACE_MEM; diff --git a/graalpython/com.oracle.graal.python.cext/src/graalpy_stacktrace.c b/graalpython/com.oracle.graal.python.cext/src/graalpy_stacktrace.c new file mode 100644 index 0000000000..817a0cc2d3 --- /dev/null +++ b/graalpython/com.oracle.graal.python.cext/src/graalpy_stacktrace.c @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * The Universal Permissive License (UPL), Version 1.0 + * + * Subject to the condition set forth below, permission is hereby granted to any + * person obtaining a copy of this software, associated documentation and/or + * data (collectively the "Software"), free of charge and under any and all + * copyright rights in the Software, and any and all patent rights owned or + * freely licensable by each licensor hereunder covering either (i) the + * unmodified Software as contributed to or provided by such licensor, or (ii) + * the Larger Works (as defined below), to deal in both + * + * (a) the Software, and + * + * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if + * one is included with the Software each a "Larger Work" to which the Software + * is contributed by such licensors), + * + * without restriction, including without limitation the rights to copy, create + * derivative works of, display, perform, and distribute the Software and make, + * use, sell, offer for sale, import, export, have made, and have sold the + * Software and the Larger Work(s), and to sublicense the foregoing rights on + * either these or other terms. + * + * This license is subject to the following condition: + * + * The above copyright notice and either this complete permission notice or at a + * minimum a reference to the UPL must be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "capi.h" + +#include +#include +#include +#include + +#if defined(MS_WINDOWS) +#include +#include +#elif (defined(__linux__) && defined(__GNU_LIBRARY__)) || defined(__APPLE__) +#include +#endif + +#define GRAALPY_NATIVE_STACK_MAX_NAME 1024 +#define GRAALPY_NATIVE_STACK_LINE_BUFFER 2048 + +typedef void (*GraalPyStacktraceWriter)(void *ctx, const char *line); + +static void +render_unavailable_stacktrace(GraalPyStacktraceWriter writer, void *ctx) +{ + writer(ctx, ""); +} + +#if defined(MS_WINDOWS) + +static int +ensure_windows_symbols_initialized(void) +{ + static int initialized = 0; + if (!initialized) { + HANDLE process = GetCurrentProcess(); + SymSetOptions(SymGetOptions() | SYMOPT_LOAD_LINES | SYMOPT_UNDNAME); + if (!SymInitialize(process, NULL, TRUE)) { + return 0; + } + initialized = 1; + } + return 1; +} + +static const char * +windows_basename(const char *path) +{ + const char *slash = strrchr(path, '\\'); + const char *alt = strrchr(path, '/'); + const char *base = slash != NULL ? slash + 1 : path; + if (alt != NULL && (slash == NULL || alt > slash)) { + base = alt + 1; + } + return base; +} + +static void +render_windows_stacktrace(GraalPyStacktraceWriter writer, void *ctx, void *const *frames, size_t depth) +{ + HANDLE process = GetCurrentProcess(); + char line[GRAALPY_NATIVE_STACK_LINE_BUFFER]; + char symbol_buffer[sizeof(SYMBOL_INFO) + GRAALPY_NATIVE_STACK_MAX_NAME]; + PSYMBOL_INFO symbol = (PSYMBOL_INFO) symbol_buffer; + + memset(symbol_buffer, 0, sizeof(symbol_buffer)); + symbol->SizeOfStruct = sizeof(SYMBOL_INFO); + symbol->MaxNameLen = GRAALPY_NATIVE_STACK_MAX_NAME - 1; + + if (!ensure_windows_symbols_initialized()) { + for (size_t i = 0; i < depth; i++) { + snprintf(line, sizeof(line), "frame[%lu]: %p", + (unsigned long) i, (void *) frames[i]); + writer(ctx, line); + } + return; + } + + for (size_t i = 0; i < depth; i++) { + DWORD64 address = (DWORD64) (uintptr_t) frames[i]; + DWORD64 displacement = 0; + IMAGEHLP_LINE64 source_line; + DWORD source_displacement = 0; + char module_path[MAX_PATH] = {'\0'}; + const char *module_name = NULL; + HMODULE module = NULL; + + memset(&source_line, 0, sizeof(source_line)); + source_line.SizeOfStruct = sizeof(source_line); + + if (GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + (LPCSTR) frames[i], &module) && GetModuleFileNameA(module, module_path, MAX_PATH) > 0) { + module_name = windows_basename(module_path); + } + + if (SymFromAddr(process, address, &displacement, symbol)) { + if (SymGetLineFromAddr64(process, address, &source_displacement, &source_line)) { + if (module_name != NULL) { + snprintf(line, sizeof(line), "frame[%lu]: %s!%s+0x%llx (%s:%lu) [%p]", + (unsigned long) i, module_name, symbol->Name, (unsigned long long) displacement, + source_line.FileName, (unsigned long) source_line.LineNumber, (void *) frames[i]); + } else { + snprintf(line, sizeof(line), "frame[%lu]: %s+0x%llx (%s:%lu) [%p]", + (unsigned long) i, symbol->Name, (unsigned long long) displacement, + source_line.FileName, (unsigned long) source_line.LineNumber, (void *) frames[i]); + } + } else if (module_name != NULL) { + snprintf(line, sizeof(line), "frame[%lu]: %s!%s+0x%llx [%p]", + (unsigned long) i, module_name, symbol->Name, (unsigned long long) displacement, (void *) frames[i]); + } else { + snprintf(line, sizeof(line), "frame[%lu]: %s+0x%llx [%p]", + (unsigned long) i, symbol->Name, (unsigned long long) displacement, (void *) frames[i]); + } + } else if (module_name != NULL) { + snprintf(line, sizeof(line), "frame[%lu]: %s [%p]", + (unsigned long) i, module_name, (void *) frames[i]); + } else { + snprintf(line, sizeof(line), "frame[%lu]: %p", + (unsigned long) i, (void *) frames[i]); + } + writer(ctx, line); + } +} + +#elif (defined(__linux__) && defined(__GNU_LIBRARY__)) || defined(__APPLE__) + +static void +render_execinfo_stacktrace(GraalPyStacktraceWriter writer, void *ctx, void *const *frames, size_t depth) +{ + char **symbols = backtrace_symbols((void *const *) frames, (int) depth); + char line[GRAALPY_NATIVE_STACK_LINE_BUFFER]; + if (symbols == NULL) { + for (size_t i = 0; i < depth; i++) { + snprintf(line, sizeof(line), "frame[%lu]: %p", + (unsigned long) i, (void *) frames[i]); + writer(ctx, line); + } + return; + } + + for (size_t i = 0; i < depth; i++) { + snprintf(line, sizeof(line), "frame[%lu]: %s", (unsigned long) i, symbols[i]); + writer(ctx, line); + } + free(symbols); +} + +#endif + +size_t +GraalPyPrivate_CaptureStacktrace(void **frames, size_t max_depth, size_t skip) +{ + if (frames == NULL || max_depth == 0) { + return 0; + } +#if defined(MS_WINDOWS) + return (size_t) CaptureStackBackTrace((ULONG) (skip + 1), (ULONG) max_depth, frames, NULL); +#elif (defined(__linux__) && defined(__GNU_LIBRARY__)) || defined(__APPLE__) + int raw_depth = backtrace(frames, (int) max_depth); + size_t depth = raw_depth > 0 ? (size_t) raw_depth : 0; + size_t start = depth > (skip + 1) ? (skip + 1) : depth; + size_t usable_depth = depth - start; + if (usable_depth > 0) { + memmove(frames, frames + start, usable_depth * sizeof(void *)); + } + return usable_depth; +#else + return 0; +#endif +} + +static void +render_stacktrace(GraalPyStacktraceWriter writer, void *ctx, void *const *frames, size_t depth) +{ + if (depth == 0) { + render_unavailable_stacktrace(writer, ctx); + return; + } +#if defined(MS_WINDOWS) + render_windows_stacktrace(writer, ctx, frames, depth); +#elif (defined(__linux__) && defined(__GNU_LIBRARY__)) || defined(__APPLE__) + render_execinfo_stacktrace(writer, ctx, frames, depth); +#else + (void) frames; + render_unavailable_stacktrace(writer, ctx); +#endif +} + +static void +file_writer(void *ctx, const char *line) +{ + fprintf((FILE *) ctx, "%s\n", line); +} + +void +GraalPyPrivate_PrintCapturedStacktrace(FILE *file, const char *header, void *const *frames, size_t depth) +{ + if (header != NULL) { + fputs(header, file); + } + render_stacktrace(file_writer, file, frames, depth); + fflush(file); +} + +void +GraalPyPrivate_PrintCurrentStacktrace(FILE *file, const char *header, size_t max_depth, size_t skip) +{ + void *frames[64]; + size_t depth = max_depth; + if (depth > (sizeof(frames) / sizeof(frames[0]))) { + depth = sizeof(frames) / sizeof(frames[0]); + } + depth = GraalPyPrivate_CaptureStacktrace(frames, depth, skip + 1); + GraalPyPrivate_PrintCapturedStacktrace(file, header, frames, depth); +} + +typedef struct { + int level; + const char *prefix; +} LogWriterCtx; + +static void +log_writer(void *ctx, const char *line) +{ + LogWriterCtx *log_ctx = (LogWriterCtx *) ctx; + if (log_ctx->prefix != NULL) { + GraalPyPrivate_Log(log_ctx->level, "%s%s\n", log_ctx->prefix, line); + } else { + GraalPyPrivate_Log(log_ctx->level, "%s\n", line); + } +} + +void +GraalPyPrivate_LogCapturedStacktrace(int level, const char *prefix, void *const *frames, size_t depth) +{ + if ((Py_Truffle_Options & level) == 0) { + return; + } + LogWriterCtx log_ctx = {level, prefix}; + render_stacktrace(log_writer, &log_ctx, frames, depth); +} diff --git a/graalpython/com.oracle.graal.python.cext/src/obmalloc.c b/graalpython/com.oracle.graal.python.cext/src/obmalloc.c index eb27176f9f..e03c961bac 100644 --- a/graalpython/com.oracle.graal.python.cext/src/obmalloc.c +++ b/graalpython/com.oracle.graal.python.cext/src/obmalloc.c @@ -40,9 +40,6 @@ */ #include "capi.h" #include "pycore_pymem.h" -#ifdef MS_WINDOWS -#include -#endif /* * This header needs to be 16 bytes long to ensure that allocations will still be aligned to 16 byte boundaries. @@ -72,7 +69,6 @@ typedef struct { #define GRAALPY_MEM_HEAD_POISON ((size_t)0xDDDDBAD0DDDDBAD0ULL) #define GRAALPY_MEM_SAMPLE_RING_SIZE (4096) #define GRAALPY_MEM_SAMPLE_HISTORY (8) -#define GRAALPY_MEM_SAMPLE_STACK_DEPTH (12) #define GRAALPY_MEM_SAMPLE_STACK_SKIP (2) #define GRAALPY_MEM_SAMPLE_USEFUL_DEPTH (10) #define MAX_COLLECTION_RETRIES (7) @@ -110,21 +106,8 @@ _GraalPyMem_SampleAllocSitesEnabled(void) static void _GraalPyMem_CaptureSampleStack(GraalPyMemSample_t *sample) { -#if (__linux__ && __GNU_LIBRARY__) - void *frames[GRAALPY_MEM_SAMPLE_STACK_DEPTH]; - int depth = backtrace(frames, GRAALPY_MEM_SAMPLE_STACK_DEPTH); - size_t start = depth > GRAALPY_MEM_SAMPLE_STACK_SKIP ? GRAALPY_MEM_SAMPLE_STACK_SKIP : (size_t) depth; - sample->depth = (size_t) depth - start; - if (sample->depth > GRAALPY_MEM_SAMPLE_USEFUL_DEPTH) { - sample->depth = GRAALPY_MEM_SAMPLE_USEFUL_DEPTH; - } - memcpy(sample->stack, frames + start, sample->depth * sizeof(void *)); -#elif defined(MS_WINDOWS) - sample->depth = (size_t) CaptureStackBackTrace(GRAALPY_MEM_SAMPLE_STACK_SKIP, - GRAALPY_MEM_SAMPLE_USEFUL_DEPTH, sample->stack, NULL); -#else - sample->depth = 0; -#endif + sample->depth = GraalPyPrivate_CaptureStacktrace(sample->stack, GRAALPY_MEM_SAMPLE_USEFUL_DEPTH, + GRAALPY_MEM_SAMPLE_STACK_SKIP); } static void @@ -159,14 +142,12 @@ _GraalPyMem_LogRecentSamples(const char *func, void *ptr) size_t index = (next_index + GRAALPY_MEM_SAMPLE_RING_SIZE - offset - 1) % GRAALPY_MEM_SAMPLE_RING_SIZE; const GraalPyMemSample_t *sample = &_GraalPyMem_Samples[index]; if (sample->ptr == ptr && sample->serial != 0) { + char prefix[128]; GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO, "%s: recent raw memory sample #%llu op=%c ptr=%p size=%lu depth=%lu\n", func, sample->serial, sample->operation, sample->ptr, (unsigned long) sample->size, (unsigned long) sample->depth); - for (size_t frame_index = 0; frame_index < sample->depth; frame_index++) { - GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO, - "%s: sample #%llu frame[%lu]=%p\n", - func, sample->serial, (unsigned long) frame_index, sample->stack[frame_index]); - } + snprintf(prefix, sizeof(prefix), "%s: sample #%llu ", func, sample->serial); + GraalPyPrivate_LogCapturedStacktrace(PY_TRUFFLE_LOG_INFO, prefix, sample->stack, sample->depth); printed++; } }