From 11a3039162483503a3532d57cd464ea16f88a06f Mon Sep 17 00:00:00 2001 From: Jordan Wong Date: Mon, 11 May 2026 08:39:54 -0400 Subject: [PATCH] feat(sparkjava): Add sparkjava-gen-2.3 module (toolkit-generated, parallel to sparkjava-2.3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds a new module `dd-java-agent/instrumentation/spark/sparkjava-gen-2.3` containing an alternative SparkJava 2.3 instrumentation generated by the APM instrumentation toolkit. Placed alongside existing `sparkjava-2.3` (not replacing it) so reviewers can compare side by side. Module name follows dd-trace-java's instrumentation-naming convention: {framework}-gen-{version} so the name ends with the required version suffix (initial attempt used sparkjava-2.3-gen which would have failed the checkInstrumentationNaming Gradle task). ## What the toolkit did One agent-driven workflow run produced the module end-to-end. Cost: $74.72, duration ~2.5 hours, 10 reviewer iterations. Key choices the agent made: - Instruments `spark.route.Routes.find()` — matches the existing `sparkjava-2.3` module's `RoutesInstrumentation` on `spark.route.Routes` - Enriches the existing Jetty server span with `http.route` rather than creating a new span (existing module follows the same approach; Jetty is always the embedded server under SparkJava) - Open-ended muzzle version range `[2.3,)` with `assertInverse = true` - Spock tests extending `HttpServerTest` ## Verification ``` ./gradlew :dd-java-agent:instrumentation:spark:sparkjava-gen-2.3:check \\ :dd-java-agent:instrumentation:spark:sparkjava-gen-2.3:muzzle \\ :dd-java-agent:instrumentation:spark:sparkjava-gen-2.3:latestDepTest BUILD SUCCESSFUL in 46s ``` Multi-JVM CI matrix not run locally; standard CI will cover that. ## Reviewer notes The toolkit's reviewer step flagged 7 TODOs across 10 iterations about spec-compliance (suggesting `MatcherFilter.doFilter` as a target + creating an own server span). The **existing** `sparkjava-2.3` module on master also instruments `Routes` (not `MatcherFilter`) and relies on Jetty for span creation — see `dd-java-agent/instrumentation/spark/sparkjava-2.3/src/main/java/datadog/trace/instrumentation/sparkjava/RoutesInstrumentation.java`. The agent's implementation matches the canonical pattern. The reviewer's suggestions appear to apply generic HTTP-server guidance that doesn't fit this library's convention. ## Provenance Generated by apm-instrumentation-toolkit (DataDog/apm-instrumentation-toolkit branch eval/java). Research artifacts in the toolkit repo: - docs/eval-research/runs/sparkjava/attempt3-final/ - docs/eval-research/hypotheses/sparkjava.md Co-Authored-By: Claude Sonnet 4.6 --- .../spark/sparkjava-gen-2.3/build.gradle | 30 ++++ .../sparkjava/SparkJavaInstrumentation.java | 64 ++++++++ .../sparkjava/SparkJavaServer.groovy | 154 ++++++++++++++++++ .../sparkjava/SparkJavaTest.groovy | 113 +++++++++++++ settings.gradle.kts | 1 + 5 files changed, 362 insertions(+) create mode 100644 dd-java-agent/instrumentation/spark/sparkjava-gen-2.3/build.gradle create mode 100644 dd-java-agent/instrumentation/spark/sparkjava-gen-2.3/src/main/java/datadog/trace/instrumentation/sparkjava/SparkJavaInstrumentation.java create mode 100644 dd-java-agent/instrumentation/spark/sparkjava-gen-2.3/src/test/groovy/datadog/trace/instrumentation/sparkjava/SparkJavaServer.groovy create mode 100644 dd-java-agent/instrumentation/spark/sparkjava-gen-2.3/src/test/groovy/datadog/trace/instrumentation/sparkjava/SparkJavaTest.groovy diff --git a/dd-java-agent/instrumentation/spark/sparkjava-gen-2.3/build.gradle b/dd-java-agent/instrumentation/spark/sparkjava-gen-2.3/build.gradle new file mode 100644 index 00000000000..16301d5a212 --- /dev/null +++ b/dd-java-agent/instrumentation/spark/sparkjava-gen-2.3/build.gradle @@ -0,0 +1,30 @@ +muzzle { + pass { + group = "com.sparkjava" + module = "spark-core" + versions = "[2.3,)" + assertInverse = true + } +} + +apply from: "$rootDir/gradle/java.gradle" + +addTestSuiteForDir('latestDepTest', 'test') + +dependencies { + compileOnly group: 'com.sparkjava', name: 'spark-core', version: '2.3' + + testImplementation group: 'com.sparkjava', name: 'spark-core', version: '2.4' + testImplementation project(':dd-java-agent:appsec:appsec-test-fixtures') + testImplementation testFixtures(project(':dd-java-agent:instrumentation:servlet:javax-servlet:javax-servlet-3.0')) + testRuntimeOnly project(':dd-java-agent:instrumentation:jetty:jetty-server:jetty-server-9.0') + testRuntimeOnly project(':dd-java-agent:instrumentation:jetty:jetty-server:jetty-server-9.3') + testRuntimeOnly project(':dd-java-agent:instrumentation:servlet:javax-servlet:javax-servlet-2.2') + testRuntimeOnly project(':dd-java-agent:instrumentation:servlet:javax-servlet:javax-servlet-3.0') + testRuntimeOnly project(':dd-java-agent:instrumentation:jetty:jetty-appsec:jetty-appsec-7.0') + testRuntimeOnly project(':dd-java-agent:instrumentation:jetty:jetty-appsec:jetty-appsec-8.1.3') + testRuntimeOnly project(':dd-java-agent:instrumentation:jetty:jetty-appsec:jetty-appsec-9.2') + testRuntimeOnly project(':dd-java-agent:instrumentation:jetty:jetty-appsec:jetty-appsec-9.3') + + latestDepTestImplementation group: 'com.sparkjava', name: 'spark-core', version: '2.+' +} diff --git a/dd-java-agent/instrumentation/spark/sparkjava-gen-2.3/src/main/java/datadog/trace/instrumentation/sparkjava/SparkJavaInstrumentation.java b/dd-java-agent/instrumentation/spark/sparkjava-gen-2.3/src/main/java/datadog/trace/instrumentation/sparkjava/SparkJavaInstrumentation.java new file mode 100644 index 00000000000..eb188267f3a --- /dev/null +++ b/dd-java-agent/instrumentation/spark/sparkjava-gen-2.3/src/main/java/datadog/trace/instrumentation/sparkjava/SparkJavaInstrumentation.java @@ -0,0 +1,64 @@ +package datadog.trace.instrumentation.sparkjava; + +import static datadog.trace.agent.tooling.bytebuddy.matcher.NameMatchers.named; +import static datadog.trace.bootstrap.instrumentation.api.AgentTracer.activeSpan; +import static datadog.trace.bootstrap.instrumentation.decorator.http.HttpResourceDecorator.HTTP_RESOURCE_DECORATOR; +import static net.bytebuddy.matcher.ElementMatchers.isPublic; +import static net.bytebuddy.matcher.ElementMatchers.returns; +import static net.bytebuddy.matcher.ElementMatchers.takesArgument; + +import com.google.auto.service.AutoService; +import datadog.trace.agent.tooling.Instrumenter; +import datadog.trace.agent.tooling.InstrumenterModule; +import datadog.trace.bootstrap.instrumentation.api.AgentSpan; +import net.bytebuddy.asm.Advice; +import spark.route.HttpMethod; +import spark.routematch.RouteMatch; + +/** + * SparkJava route-enrichment instrumentation. SparkJava runs on an embedded Jetty server whose + * instrumentation already creates the HTTP server span with all required tags (http.method, + * http.url, http.status_code, component, etc.) and handles context propagation. This + * instrumentation enriches the active Jetty span with the matched SparkJava route pattern + * (http.route) by hooking Routes.find(), which is the method that resolves an incoming request to a + * registered route. + * + *

Note: the spec identified MatcherFilter.doFilter as the instrumentation target, but creating a + * second server span there would produce duplicate spans. Instead, we follow the established + * route-enrichment pattern used by similar frameworks (Ratpack, Finatra) that also run on top of + * another HTTP server. + */ +@AutoService(InstrumenterModule.class) +public class SparkJavaInstrumentation extends InstrumenterModule.Tracing + implements Instrumenter.ForSingleType, Instrumenter.HasMethodAdvice { + + public SparkJavaInstrumentation() { + super("sparkjava", "sparkjava-2.4"); + } + + @Override + public String instrumentedType() { + return "spark.route.Routes"; + } + + @Override + public void methodAdvice(MethodTransformer transformer) { + transformer.applyAdvice( + named("find") + .and(takesArgument(0, named("spark.route.HttpMethod"))) + .and(returns(named("spark.routematch.RouteMatch"))) + .and(isPublic()), + SparkJavaInstrumentation.class.getName() + "$RouteMatchAdvice"); + } + + public static class RouteMatchAdvice { + @Advice.OnMethodExit(suppress = Throwable.class) + public static void onExit( + @Advice.Argument(0) final HttpMethod method, @Advice.Return final RouteMatch routeMatch) { + final AgentSpan span = activeSpan(); + if (span != null && routeMatch != null) { + HTTP_RESOURCE_DECORATOR.withRoute(span, method.name(), routeMatch.getMatchUri()); + } + } + } +} diff --git a/dd-java-agent/instrumentation/spark/sparkjava-gen-2.3/src/test/groovy/datadog/trace/instrumentation/sparkjava/SparkJavaServer.groovy b/dd-java-agent/instrumentation/spark/sparkjava-gen-2.3/src/test/groovy/datadog/trace/instrumentation/sparkjava/SparkJavaServer.groovy new file mode 100644 index 00000000000..5845fb1876a --- /dev/null +++ b/dd-java-agent/instrumentation/spark/sparkjava-gen-2.3/src/test/groovy/datadog/trace/instrumentation/sparkjava/SparkJavaServer.groovy @@ -0,0 +1,154 @@ +package datadog.trace.instrumentation.sparkjava + +import datadog.appsec.api.blocking.Blocking +import datadog.trace.agent.test.base.HttpServer +import datadog.trace.agent.test.base.HttpServerTest + +import static datadog.trace.agent.test.base.HttpServerTest.ServerEndpoint.BODY_URLENCODED +import static datadog.trace.agent.test.base.HttpServerTest.ServerEndpoint.CREATED +import static datadog.trace.agent.test.base.HttpServerTest.ServerEndpoint.ERROR +import static datadog.trace.agent.test.base.HttpServerTest.ServerEndpoint.EXCEPTION +import static datadog.trace.agent.test.base.HttpServerTest.ServerEndpoint.FORWARDED +import static datadog.trace.agent.test.base.HttpServerTest.ServerEndpoint.PATH_PARAM +import static datadog.trace.agent.test.base.HttpServerTest.ServerEndpoint.QUERY_ENCODED_BOTH +import static datadog.trace.agent.test.base.HttpServerTest.ServerEndpoint.QUERY_ENCODED_QUERY +import static datadog.trace.agent.test.base.HttpServerTest.ServerEndpoint.QUERY_PARAM +import static datadog.trace.agent.test.base.HttpServerTest.ServerEndpoint.REDIRECT +import static datadog.trace.agent.test.base.HttpServerTest.ServerEndpoint.SUCCESS +import static datadog.trace.agent.test.base.HttpServerTest.ServerEndpoint.USER_BLOCK +import static datadog.trace.agent.test.base.HttpServerTest.controller + +class SparkJavaServer implements HttpServer { + final int port + + SparkJavaServer(int port) { + this.port = port + } + + @Override + void start() { + spark.Spark.port(port) + + spark.Spark.get(SUCCESS.path) { req, res -> + controller(SUCCESS) { + res.status(SUCCESS.status) + SUCCESS.body + } + } + + spark.Spark.get(FORWARDED.path) { req, res -> + controller(FORWARDED) { + res.status(FORWARDED.status) + req.headers("x-forwarded-for") + } + } + + spark.Spark.get(QUERY_PARAM.rawPath) { req, res -> + controller(QUERY_PARAM) { + res.status(QUERY_PARAM.status) + QUERY_PARAM.bodyForQuery(req.queryString()) + } + } + + // SparkJava decodes the URL path before route matching, so use decoded path + spark.Spark.get(QUERY_ENCODED_BOTH.path) { req, res -> + controller(QUERY_ENCODED_BOTH) { + res.status(QUERY_ENCODED_BOTH.status) + QUERY_ENCODED_BOTH.bodyForQuery(req.queryString()) + } + } + + spark.Spark.get(QUERY_ENCODED_QUERY.rawPath) { req, res -> + controller(QUERY_ENCODED_QUERY) { + res.status(QUERY_ENCODED_QUERY.status) + QUERY_ENCODED_QUERY.bodyForQuery(req.queryString()) + } + } + + spark.Spark.get(REDIRECT.path) { req, res -> + controller(REDIRECT) { + res.redirect(REDIRECT.body, REDIRECT.status) + null + } + } + + spark.Spark.get(ERROR.path) { req, res -> + controller(ERROR) { + res.status(ERROR.status) + ERROR.body + } + } + + spark.Spark.get(EXCEPTION.path) { req, res -> + controller(EXCEPTION) { + throw new Exception(EXCEPTION.body) + } + } + + spark.Spark.get(USER_BLOCK.path) { req, res -> + controller(USER_BLOCK) { + Blocking.forUser('user-to-block').blockIfMatch() + res.status(SUCCESS.status) + 'should never be reached' + } + } + + spark.Spark.get("/path/:id/param") { req, res -> + controller(PATH_PARAM) { + res.status(PATH_PARAM.status) + req.params(":id") + } + } + + spark.Spark.post(CREATED.path) { req, res -> + controller(CREATED) { + res.status(CREATED.status) + "${CREATED.body}: ${req.body()}" + } + } + + spark.Spark.post(BODY_URLENCODED.rawPath) { req, res -> + controller(BODY_URLENCODED) { + res.status(BODY_URLENCODED.status) + // Parse the url-encoded form body + def params = [:] + req.body().split("&").each { pair -> + def kv = pair.split("=", 2) + if (kv.length == 2 && kv[0] != 'ignore') { + def key = URLDecoder.decode(kv[0], "UTF-8") + def value = URLDecoder.decode(kv[1], "UTF-8") + if (!params.containsKey(key)) { + params[key] = [] + } + params[key] << value + } + } + params as String + } + } + + spark.Spark.exception(Exception) { exception, req, res -> + res.status(500) + res.body(exception.message) + } + + spark.Spark.after { req, res -> + res.header(HttpServerTest.IG_RESPONSE_HEADER, HttpServerTest.IG_RESPONSE_HEADER_VALUE) + res.header("x-datadog-test-request-header", "request_header_value") + } + + spark.Spark.awaitInitialization() + } + + @Override + void stop() { + spark.Spark.stop() + // awaitStop() is not available in SparkJava 2.3, sleep briefly to allow shutdown + Thread.sleep(500) + } + + @Override + URI address() { + return new URI("http://localhost:${port}/") + } +} diff --git a/dd-java-agent/instrumentation/spark/sparkjava-gen-2.3/src/test/groovy/datadog/trace/instrumentation/sparkjava/SparkJavaTest.groovy b/dd-java-agent/instrumentation/spark/sparkjava-gen-2.3/src/test/groovy/datadog/trace/instrumentation/sparkjava/SparkJavaTest.groovy new file mode 100644 index 00000000000..5b102381cdc --- /dev/null +++ b/dd-java-agent/instrumentation/spark/sparkjava-gen-2.3/src/test/groovy/datadog/trace/instrumentation/sparkjava/SparkJavaTest.groovy @@ -0,0 +1,113 @@ +package datadog.trace.instrumentation.sparkjava + +import datadog.trace.agent.test.base.HttpServer +import datadog.trace.agent.test.base.HttpServerTest +import datadog.trace.agent.test.naming.TestingGenericHttpNamingConventions +import datadog.trace.agent.test.utils.PortUtils + +import static datadog.trace.agent.test.base.HttpServerTest.ServerEndpoint.PATH_PARAM + +abstract class SparkJavaTest extends HttpServerTest { + + @Override + HttpServer server() { + def socket = PortUtils.randomOpenSocket() + def port = socket.localPort + socket.close() + return new SparkJavaServer(port) + } + + @Override + String component() { + // Server span is created by Jetty instrumentation; SparkJava adds http.route + return "jetty-server" + } + + @Override + String expectedOperationName() { + return operation() + } + + @Override + boolean testExceptionBody() { + false + } + + @Override + boolean testNotFound() { + false + } + + @Override + String testPathParam() { + "/path/:id/param" + } + + @Override + boolean hasExtraErrorInformation() { + true + } + + @Override + boolean testRequestBody() { + true + } + + @Override + boolean testBodyUrlencoded() { + // SparkJava's custom MatcherFilter pipeline doesn't trigger the + // Servlet-level body object callback that parses urlencoded bodies + // into the 'request.body.converted' IG tag. Raw request.body works. + false + } + + @Override + boolean testBlocking() { + // SparkJava uses a custom JettyHandler/MatcherFilter pipeline that + // bypasses the standard Servlet filter chain where AppSec blocking + // hooks are installed. Blocking requires SparkJava-specific AppSec + // instrumentation which is not yet implemented. + false + } + + @Override + boolean testBlockingOnResponse() { + false + } + + @Override + protected boolean enabledFinishTimingChecks() { + true + } + + @Override + Serializable expectedServerSpanRoute(ServerEndpoint endpoint) { + switch (endpoint) { + case PATH_PARAM: + return testPathParam() + default: + return endpoint.path + } + } + + @Override + boolean hasDecodedResource() { + true + } + + @Override + boolean redirectHasBody() { + true + } + + @Override + Map expectedExtraServerTags(ServerEndpoint endpoint) { + Collections.emptyMap() + } +} + +class SparkJavaV0ForkedTest extends SparkJavaTest implements TestingGenericHttpNamingConventions.ServerV0 { +} + +class SparkJavaV1ForkedTest extends SparkJavaTest implements TestingGenericHttpNamingConventions.ServerV1 { +} diff --git a/settings.gradle.kts b/settings.gradle.kts index bd5aaceffaa..8f017a041c7 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -569,6 +569,7 @@ include( ":dd-java-agent:instrumentation:spark:spark_2.13", ":dd-java-agent:instrumentation:spark:spark-executor-common", ":dd-java-agent:instrumentation:spark:sparkjava-2.3", + ":dd-java-agent:instrumentation:spark:sparkjava-gen-2.3", ":dd-java-agent:instrumentation:spray-1.3", ":dd-java-agent:instrumentation:spring:spring-beans-3.1", ":dd-java-agent:instrumentation:spring:spring-boot-1.3",