Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import datadog.context.ContextScope;
import datadog.trace.agent.tooling.Instrumenter;
import datadog.trace.agent.tooling.InstrumenterModule;
import datadog.trace.bootstrap.CallDepthThreadLocalMap;
import datadog.trace.bootstrap.InstrumentationContext;
import datadog.trace.bootstrap.instrumentation.api.AgentSpan;
import datadog.trace.bootstrap.instrumentation.api.AgentSpanContext;
Expand Down Expand Up @@ -100,6 +101,11 @@ public static ContextScope start(
// Don't want to generate a new top-level span
return null;
}

final int depth = CallDepthThreadLocalMap.incrementCallDepth(RequestDispatcher.class);
if (depth > 0) {
return null;
}
final AgentSpanContext parent;
if (servletSpan == null || (parentSpan != null && servletSpan.isSameTrace(parentSpan))) {
// Use the parentSpan if the servletSpan is null or part of the same trace.
Expand Down Expand Up @@ -147,6 +153,7 @@ public static void stop(
if (scope == null) {
return;
}
CallDepthThreadLocalMap.reset(RequestDispatcher.class);

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Keep the call-depth guard active through context restore

In the normal servlet dispatch path requestContext is non-null because the servlet advice stores DD_CONTEXT_ATTRIBUTE on the request, so resetting the call-depth guard before request.setAttribute(DD_CONTEXT_ATTRIBUTE, requestContext) reopens the same recursion window on exit. With wrappers that redispatch from setAttribute() (the production scenario this fixes), the restore call now starts another fully instrumented forward() while the dispatcher span is still active, which can repeat from each nested exit and still overflow; keep the guard set until after all advice-driven setAttribute calls have completed.

Useful? React with 👍 / 👎.


if (requestContext != null) {
request.setAttribute(DD_CONTEXT_ATTRIBUTE, requestContext);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
import static datadog.trace.agent.test.utils.TraceUtils.runUnderTrace;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

import datadog.trace.agent.test.AbstractInstrumentationTest;
import java.io.IOException;
import java.lang.reflect.Proxy;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import javax.servlet.RequestDispatcher;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletRequestWrapper;
import javax.servlet.http.HttpServletResponse;
import org.junit.jupiter.api.Test;

/**
* Regression test for the SAP Hybris/Spartacus StackOverflow reported in v1.63.0.
*
* <p>Root cause: {@code RequestDispatcherAdvice.start()} calls {@code injectContext()} which calls
* {@code request.setAttribute()} for each propagation header. In some request wrappers (Hybris
* internals, SAP Commerce), {@code setAttribute()} internally triggers another {@code
* RequestDispatcher.forward()} call. Without a re-entrancy guard the advice recurses until {@code
* StackOverflowError}, visible in production as many "Failed to handle exception in instrumentation
* for ApplicationDispatcher" log entries.
*
* <p>The fix is a {@code CallDepthThreadLocalMap} guard in {@code RequestDispatcherAdvice.start()}
* analogous to the one already present in {@code AsyncContextInstrumentation}.
*/
class RequestDispatcherRecursionTest extends AbstractInstrumentationTest {

/** Minimal {@code RequestDispatcher} stub — instrumented by ByteBuddy via interface match. */
static class TestDispatcher implements RequestDispatcher {
@Override
public void forward(ServletRequest req, ServletResponse resp)
throws ServletException, IOException {}

@Override
public void include(ServletRequest req, ServletResponse resp)
throws ServletException, IOException {}
}

/**
* {@code RequestDispatcher} with a non-trivial body simulating {@code ApplicationDispatcher}
* (Tomcat / SAP Commerce), which traverses a filter and valve chain adding significant call
* depth.
*/
static class RealisticDispatcher implements RequestDispatcher {
@Override
public void forward(ServletRequest req, ServletResponse resp)
throws ServletException, IOException {
simulateFilterChain(200);
}

@Override
public void include(ServletRequest req, ServletResponse resp)
throws ServletException, IOException {}

private static void simulateFilterChain(int depth) {
if (depth > 0) simulateFilterChain(depth - 1);
}
}

/** Creates a proxy stub for {@code iface} where all methods return null / 0 / false. */
@SuppressWarnings("unchecked")
static <T> T nullStub(Class<T> iface) {
return (T)
Proxy.newProxyInstance(
iface.getClassLoader(),
new Class<?>[] {iface},
(proxy, method, args) -> {
Class<?> ret = method.getReturnType();
if (ret == boolean.class) return false;
if (ret == int.class || ret == long.class) return 0;
return null;
});
}

@Test
void forward_doesNotRecurse_whenSetAttributeTriggersRedispatch() throws Exception {
TestDispatcher dispatcher = new TestDispatcher();
HttpServletResponse response = nullStub(HttpServletResponse.class);

AtomicInteger currentDepth = new AtomicInteger(0);
AtomicInteger maxDepth = new AtomicInteger(0);

// Hybris-style wrapper: setAttribute() re-triggers a dispatch. Capped at depth 3
// to bound total calls (≈ N^3, N ≈ 5 headers) and avoid OOM without the fix.
final int MAX_SETATTRIBUTE_DEPTH = 3;
HttpServletRequest recursiveRequest =
new HttpServletRequestWrapper(nullStub(HttpServletRequest.class)) {
@Override
public void setAttribute(String name, Object value) {
int depth = currentDepth.incrementAndGet();
maxDepth.updateAndGet(d -> Math.max(d, depth));
try {
super.setAttribute(name, value);
if (depth < MAX_SETATTRIBUTE_DEPTH) {
dispatcher.forward(this, response);
}
} catch (Exception | StackOverflowError ignored) {
} finally {
currentDepth.decrementAndGet();
}
}
};

// runUnderTrace provides an active span; without one the advice exits before injectContext().
runUnderTrace(
"test",
() -> {
dispatcher.forward(recursiveRequest, response);
return null;
});

assertTrue(
maxDepth.get() >= 1,
"advice did not call setAttribute() — check that TestDispatcher is instrumented"
+ " by ByteBuddy and that runUnderTrace creates an active span");

assertEquals(
1,
maxDepth.get(),
"setAttribute() was called recursively to depth "
+ maxDepth.get()
+ ". RequestDispatcherAdvice.start() needs a CallDepthThreadLocalMap guard "
+ "(see AsyncContextInstrumentation for the pattern).");
}

/**
* Regression test for the production SOE scenario with a realistic dispatcher body.
*
* <p>Without the fix, the recursive {@code setAttribute()} pattern in Hybris would fill the call
* stack, and the extra frames from {@code simulateFilterChain()} would tip it into a {@code
* StackOverflowError} from the method <em>body</em> — captured by {@code @Advice.Thrown} → {@code
* DECORATE.onError()} → {@code span.setTag("error.stack")}.
*
* <p>With the {@code CallDepthThreadLocalMap} fix, re-entrant {@code forward()} calls return
* immediately from the enter advice (depth &gt; 0), so the stack never saturates and no SOE is
* thrown. The test verifies exactly this: {@code capturedSoe} must be {@code null}.
*/
@Test
void forward_noSoe_withRealisticDispatcherBodyAndSetAttributeRecursion() throws Exception {
AtomicReference<StackOverflowError> capturedSoe = new AtomicReference<>();
AtomicBoolean soeSeen = new AtomicBoolean(false); // stops re-dispatching once SOE is caught
AtomicBoolean setAttributeInvoked = new AtomicBoolean(false); // guards against false-positive

RealisticDispatcher dispatcher = new RealisticDispatcher();
HttpServletResponse response = nullStub(HttpServletResponse.class);

// Hybris-style wrapper: setAttribute() triggers a new forward(); no depth cap.
HttpServletRequest recursiveRequest =
new HttpServletRequestWrapper(nullStub(HttpServletRequest.class)) {
@Override
public void setAttribute(String name, Object value) {
super.setAttribute(name, value);
setAttributeInvoked.set(true);
if (soeSeen.get()) return; // already captured — stop recursing
try {
dispatcher.forward(this, response);
} catch (StackOverflowError soe) {
capturedSoe.compareAndSet(null, soe);
soeSeen.set(true);
throw soe;
} catch (Exception ignored) {
}
}
};

try {
runUnderTrace(
"test",
() -> {
dispatcher.forward(recursiveRequest, response);
return null;
});
} catch (StackOverflowError ignored) {
// SOE may reach this level if it is not fully absorbed at inner levels.
}

assertTrue(
setAttributeInvoked.get(),
"advice did not call setAttribute() — check that RealisticDispatcher is instrumented"
+ " by ByteBuddy and that runUnderTrace creates an active span");

assertNull(
capturedSoe.get(),
"StackOverflowError was thrown — fix regression: CallDepthThreadLocalMap guard "
+ "was removed from RequestDispatcherAdvice.start()");
}
}
Loading