Skip to content

Commit 40ca6a7

Browse files
google-genai-botcopybara-github
authored andcommitted
feat: enabling output_schema and tools to coexist
This CL enables the simultaneous use of `output_schema` (structured output) and `tools` for models that do not natively support both features at once (specifically Gemini 1.x and 2.x on Vertex AI). ### Core Logic The CL implements a workaround for models with this limitation: 1. **Synthetic Tooling**: Instead of passing the `output_schema` directly to the model's configuration, it introduces a synthetic tool called `set_model_response`. 2. **Schema Injection**: The parameters of this tool are set to the requested `output_schema`. 3. **Instruction Prompting**: System instructions are appended, directing the model to provide its final response using this specific tool in the required format. 4. **Response Interception**: The `BaseLlmFlow` is updated to check if `set_model_response` was called. If so, it extracts the JSON arguments and converts them into a standard model response event. ### Key Changes * **`OutputSchema.java` (New)**: A new `RequestProcessor` that detects when the workaround is needed, adds the `SetModelResponseTool`, and provides utilities for extracting the structured response. * **`SetModelResponseTool.java` (New)**: A marker tool that simply returns its input arguments, used to "capture" the structured output from the model. * **`ModelNameUtils.java`**: Added logic to identify Gemini 1.x and 2.x models and determine if they can handle native `output_schema` alongside tools. * **`BaseLlmFlow.java`**: Updated the flow logic to detect the synthetic tool response and generate the final output event. * **`Basic.java`**: Updated to prevent native `outputSchema` configuration when the workaround is active. * **`SingleFlow.java`**: Registered the new `OutputSchema` processor. PiperOrigin-RevId: 886769688
1 parent 8ea81f7 commit 40ca6a7

7 files changed

Lines changed: 410 additions & 5 deletions

File tree

core/src/main/java/com/google/adk/flows/llmflows/BaseLlmFlow.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -692,9 +692,14 @@ private Flowable<Event> buildPostprocessingEvents(
692692
Optional<Event> toolConfirmationEvent =
693693
Functions.generateRequestConfirmationEvent(
694694
context, modelResponseEvent, functionResponseEvent);
695-
return toolConfirmationEvent.isPresent()
696-
? Flowable.just(toolConfirmationEvent.get(), functionResponseEvent)
697-
: Flowable.just(functionResponseEvent);
695+
List<Event> events = new ArrayList<>();
696+
toolConfirmationEvent.ifPresent(events::add);
697+
events.add(functionResponseEvent);
698+
OutputSchema.getStructuredModelResponse(functionResponseEvent)
699+
.ifPresent(
700+
json ->
701+
events.add(OutputSchema.createFinalModelResponseEvent(context, json)));
702+
return Flowable.fromIterable(events);
698703
});
699704
}
700705

core/src/main/java/com/google/adk/flows/llmflows/Basic.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import com.google.adk.agents.InvocationContext;
2020
import com.google.adk.agents.LlmAgent;
2121
import com.google.adk.models.LlmRequest;
22+
import com.google.adk.utils.ModelNameUtils;
2223
import com.google.common.collect.ImmutableList;
2324
import com.google.genai.types.GenerateContentConfig;
2425
import com.google.genai.types.LiveConnectConfig;
@@ -60,7 +61,15 @@ public Single<RequestProcessor.RequestProcessingResult> processRequest(
6061
.orElseGet(() -> GenerateContentConfig.builder().build()))
6162
.liveConnectConfig(liveConnectConfigBuilder.build());
6263

63-
agent.outputSchema().ifPresent(builder::outputSchema);
64+
agent
65+
.outputSchema()
66+
.ifPresent(
67+
outputSchema -> {
68+
if (agent.toolsUnion().isEmpty()
69+
|| ModelNameUtils.canUseOutputSchemaWithTools(modelName)) {
70+
builder.outputSchema(outputSchema);
71+
}
72+
});
6473
return Single.just(
6574
RequestProcessor.RequestProcessingResult.create(builder.build(), ImmutableList.of()));
6675
}
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/*
2+
* Copyright 2026 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.adk.flows.llmflows;
18+
19+
import com.fasterxml.jackson.core.JsonProcessingException;
20+
import com.google.adk.JsonBaseModel;
21+
import com.google.adk.agents.InvocationContext;
22+
import com.google.adk.agents.LlmAgent;
23+
import com.google.adk.events.Event;
24+
import com.google.adk.models.LlmRequest;
25+
import com.google.adk.tools.SetModelResponseTool;
26+
import com.google.adk.tools.ToolContext;
27+
import com.google.adk.utils.ModelNameUtils;
28+
import com.google.common.collect.ImmutableList;
29+
import com.google.genai.types.Content;
30+
import com.google.genai.types.FunctionResponse;
31+
import com.google.genai.types.Part;
32+
import io.reactivex.rxjava3.core.Single;
33+
import java.util.Objects;
34+
import java.util.Optional;
35+
import org.slf4j.Logger;
36+
import org.slf4j.LoggerFactory;
37+
38+
/** Processor that handles output schema for agents with tools. */
39+
public final class OutputSchema implements RequestProcessor {
40+
41+
private static final Logger logger = LoggerFactory.getLogger(OutputSchema.class);
42+
43+
public OutputSchema() {}
44+
45+
@Override
46+
public Single<RequestProcessingResult> processRequest(
47+
InvocationContext context, LlmRequest request) {
48+
if (!(context.agent() instanceof LlmAgent)) {
49+
return Single.just(RequestProcessingResult.create(request, ImmutableList.of()));
50+
}
51+
LlmAgent agent = (LlmAgent) context.agent();
52+
String modelName = request.model().orElse("");
53+
54+
if (agent.outputSchema().isEmpty()
55+
|| agent.toolsUnion().isEmpty()
56+
|| ModelNameUtils.canUseOutputSchemaWithTools(modelName)) {
57+
return Single.just(RequestProcessingResult.create(request, ImmutableList.of()));
58+
}
59+
60+
// Add the set_model_response tool to handle structured output
61+
SetModelResponseTool setResponseTool = new SetModelResponseTool(agent.outputSchema().get());
62+
LlmRequest.Builder builder = request.toBuilder();
63+
64+
return setResponseTool
65+
.processLlmRequest(builder, ToolContext.builder(context).build())
66+
.andThen(
67+
Single.fromCallable(
68+
() -> {
69+
builder.appendInstructions(
70+
ImmutableList.of(
71+
"IMPORTANT: You have access to other tools, but you must provide your"
72+
+ " final response using the set_model_response tool with the"
73+
+ " required structured format. After using any other tools needed"
74+
+ " to complete the task, always call set_model_response with your"
75+
+ " final answer in the specified schema format."));
76+
return RequestProcessingResult.create(builder.build(), ImmutableList.of());
77+
}));
78+
}
79+
80+
/**
81+
* Check if function response contains set_model_response and extract JSON.
82+
*
83+
* @param functionResponseEvent The function response event to check.
84+
* @return JSON response string if set_model_response was called, Optional.empty() otherwise.
85+
*/
86+
public static Optional<String> getStructuredModelResponse(Event functionResponseEvent) {
87+
for (FunctionResponse funcResponse : functionResponseEvent.functionResponses()) {
88+
if (Objects.equals(funcResponse.name().orElse(""), SetModelResponseTool.NAME)) {
89+
Object response = funcResponse.response();
90+
// The tool returns the args map directly.
91+
try {
92+
return Optional.of(JsonBaseModel.getMapper().writeValueAsString(response));
93+
} catch (JsonProcessingException e) {
94+
logger.error("Failed to serialize set_model_response result", e);
95+
return Optional.empty();
96+
}
97+
}
98+
}
99+
return Optional.empty();
100+
}
101+
102+
/**
103+
* Create a final model response event from set_model_response JSON.
104+
*
105+
* @param context The invocation context.
106+
* @param jsonResponse The JSON response from set_model_response tool.
107+
* @return A new Event that looks like a normal model response.
108+
*/
109+
public static Event createFinalModelResponseEvent(
110+
InvocationContext context, String jsonResponse) {
111+
return Event.builder()
112+
.id(Event.generateEventId())
113+
.invocationId(context.invocationId())
114+
.author(context.agent().name())
115+
.branch(context.branch().orElse(null))
116+
.content(Content.builder().role("model").parts(Part.fromText(jsonResponse)).build())
117+
.build();
118+
}
119+
}

core/src/main/java/com/google/adk/flows/llmflows/SingleFlow.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ public class SingleFlow extends BaseLlmFlow {
2727
protected static final ImmutableList<RequestProcessor> REQUEST_PROCESSORS =
2828
ImmutableList.of(
2929
new Basic(),
30+
new OutputSchema(),
3031
new RequestConfirmationLlmRequestProcessor(),
3132
new Instructions(),
3233
new Identity(),
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* Copyright 2026 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.adk.tools;
18+
19+
import com.google.genai.types.FunctionDeclaration;
20+
import com.google.genai.types.Schema;
21+
import io.reactivex.rxjava3.core.Single;
22+
import java.util.Map;
23+
import java.util.Optional;
24+
import javax.annotation.Nonnull;
25+
26+
/**
27+
* Internal tool used for output schema workaround.
28+
*
29+
* <p>This tool allows the model to set its final response when output_schema is configured
30+
* alongside other tools. The model should use this tool to provide its final structured response
31+
* instead of outputting text directly.
32+
*/
33+
public class SetModelResponseTool extends BaseTool {
34+
public static final String NAME = "set_model_response";
35+
36+
private final Schema outputSchema;
37+
38+
public SetModelResponseTool(@Nonnull Schema outputSchema) {
39+
super(
40+
NAME,
41+
"Set your final response using the required output schema. "
42+
+ "After using any other tools needed to complete the task, always call"
43+
+ " set_model_response with your final answer in the specified schema format.");
44+
this.outputSchema = outputSchema;
45+
}
46+
47+
@Override
48+
public Optional<FunctionDeclaration> declaration() {
49+
return Optional.of(
50+
FunctionDeclaration.builder()
51+
.name(name())
52+
.description(description())
53+
.parameters(outputSchema)
54+
.build());
55+
}
56+
57+
@Override
58+
public Single<Map<String, Object>> runAsync(Map<String, Object> args, ToolContext toolContext) {
59+
// This tool is a marker for the final response, it doesn't do anything but return its arguments
60+
// which will be captured as the final result.
61+
return Single.just(args);
62+
}
63+
}

core/src/main/java/com/google/adk/utils/ModelNameUtils.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.regex.Matcher;
2222
import java.util.regex.Pattern;
2323

24+
/** Utility class for model names. */
2425
public final class ModelNameUtils {
2526
private static final String GEMINI_PREFIX = "gemini-";
2627
private static final Pattern GEMINI_2_PATTERN = Pattern.compile("^gemini-2\\..*");
@@ -35,11 +36,15 @@ public static boolean isGeminiModel(String modelString) {
3536
}
3637

3738
public static boolean isGemini2Model(String modelString) {
39+
return matchesModelPattern(modelString, GEMINI_2_PATTERN);
40+
}
41+
42+
private static boolean matchesModelPattern(String modelString, Pattern pattern) {
3843
if (modelString == null) {
3944
return false;
4045
}
4146
String modelName = extractModelName(modelString);
42-
return GEMINI_2_PATTERN.matcher(modelName).matches();
47+
return pattern.matcher(modelName).matches();
4348
}
4449

4550
/**
@@ -65,6 +70,17 @@ public static boolean isInstanceOfGemini(Object o) {
6570
return false;
6671
}
6772

73+
/**
74+
* Returns true if the model supports using output schema together with tools.
75+
*
76+
* @param modelString The model name or path.
77+
* @return true if output schema with tools is supported, false otherwise.
78+
*/
79+
public static boolean canUseOutputSchemaWithTools(String modelString) {
80+
// Current limitation for Vertex AI 2.x models.
81+
return !isGemini2Model(modelString);
82+
}
83+
6884
/**
6985
* Extract the actual model name from either simple or path-based format.
7086
*

0 commit comments

Comments
 (0)