Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 61 additions & 16 deletions openhands-sdk/openhands/sdk/llm/mixins/fn_call_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,8 @@ def get_example_for_tools(tools: list[ChatCompletionToolParam]) -> str:
""" # noqa: E501

# Regex patterns for function call parsing
FN_REGEX_PATTERN = r"<function=([^>]+)>\n(.*?)</function>"
# Note: newline after function name is optional for compatibility with various models
FN_REGEX_PATTERN = r"<function=([^>]+)>\n?(.*?)</function>"
FN_PARAM_REGEX_PATTERN = r"<parameter=([^>]+)>(.*?)</parameter>"

# Add new regex pattern for tool execution results
Expand Down Expand Up @@ -702,7 +703,7 @@ def convert_fncall_messages_to_non_fncall_messages(
first_user_message_encountered = False
for message in messages:
role = message["role"]
content: Content = message["content"]
content: Content = message.get("content") or ""

# 1. SYSTEM MESSAGES
# append system prompt suffix to content
Expand Down Expand Up @@ -880,6 +881,9 @@ def _extract_and_validate_params(
for param_match in param_matches:
param_name = param_match.group(1)
param_value = param_match.group(2)
# Normalize whitespace: some models add extra newlines around values
if isinstance(param_value, str):
param_value = param_value.strip()

# Validate parameter is allowed
if allowed_params and param_name not in allowed_params:
Expand Down Expand Up @@ -927,20 +931,43 @@ def _extract_and_validate_params(
found_params.add(param_name)

# Check all required parameters are present
missing_params = required_params - found_params
# Note: security_risk is excluded here because its validation happens later
# in Agent._extract_security_risk(), which has context about whether a security
# analyzer is configured. This allows weaker models to omit it when no analyzer
# is active, while still enforcing it for stronger models with LLMSecurityAnalyzer.
missing_params = required_params - found_params - {"security_risk"}
if missing_params:
raise FunctionCallValidationError(
f"Missing required parameters for function '{fn_name}': {missing_params}"
)
return params


def _preprocess_model_output(content: str) -> str:
"""Clean up model-specific formatting before parsing function calls.

Removes wrapper tags that some models (like Nemotron) emit around function calls:
- </think> before the function call
- <tool_call>...</tool_call> around the function call

Only strips tags at boundaries, not inside parameter values.
"""
# Strip </think> when it appears before <function= (Nemotron reasoning end)
content = re.sub(r"</think>\s*(?=<function=)", "", content)
# Strip <tool_call> when it appears right before <function=
content = re.sub(r"<tool_call>\s*(?=<function=)", "", content)
# Strip </tool_call> when it appears right after </function>
content = re.sub(r"(?<=</function>)\s*</tool_call>", "", content)
return content


def _fix_stopword(content: str) -> str:
"""Fix the issue when some LLM would NOT return the stopword."""
content = _preprocess_model_output(content)
if "<function=" in content and content.count("<function=") == 1:
if content.endswith("</"):
content = content.rstrip() + "function>"
else:
elif not content.rstrip().endswith("</function>"):
content = content + "\n</function>"
return content

Expand Down Expand Up @@ -981,8 +1008,8 @@ def convert_non_fncall_messages_to_fncall_messages(

first_user_message_encountered = False
for message in messages:
role, content = message["role"], message["content"]
content = content or "" # handle cases where content is None
role = message["role"]
content = message.get("content") or ""
# For system messages, remove the added suffix
if role == "system":
if isinstance(content, str):
Expand Down Expand Up @@ -1124,15 +1151,32 @@ def convert_non_fncall_messages_to_fncall_messages(
if fn_match:
fn_name = fn_match.group(1)
fn_body = _normalize_parameter_tags(fn_match.group(2))
matching_tool: ChatCompletionToolParamFunctionChunk | None = next(
(
tool["function"]
for tool in tools
if tool["type"] == "function"
and tool["function"]["name"] == fn_name
),
None,
)

def _find_tool(
name: str,
) -> ChatCompletionToolParamFunctionChunk | None:
return next(
(
tool["function"]
for tool in tools
if tool["type"] == "function"
and tool["function"]["name"] == name
),
None,
)

matching_tool = _find_tool(fn_name)
# Try aliases if tool not found (some models use legacy names)
if not matching_tool:
TOOL_NAME_ALIASES = {
"str_replace_editor": "file_editor",
"bash": "terminal",
"execute_bash": "terminal",
"str_replace": "file_editor",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This... is this really Nemotron doing it? Or could it have been due to an outdated prompt or anything in the context? No, it probably isn't prompting... 🤔

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah i think so, nemotron is really overfitted to execute_bash in my experience 😓

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, it's definitely Nemotron being I guess Nemotron 😅

We ran into this while testing - even when the prompt clearly says the tools are terminal and file_editor, it keeps outputting execute_bash and str_replace_editor. can't help but think it may have been trained on older OpenHands data with those names baked in pretty hard.

The alias fix is just a fallback though - we try exact match first, so it won't mess with models that already get the names right.

Also hit a few other quirks: it skips the newline after <function=...>, wraps stuff in <tool_call> tags, and sometimes throws in extra whitespace. Small stuff but enough to break the parser.

Oh and we skip ICL examples for it too - similar to what's already done for devstral. Context window issues.

LMK if all looks ok.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can't help but think it may have been trained on older OpenHands data with those names baked in pretty hard

🫠

My thought too 🤷‍♂️

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I don't think Nemotron-3-Nano-30B-A3B has enough chops, Here are all the trajectories and metrics:
https://huggingface.co/datasets/pankajmathur/nemotron-nano-swebench-verified-traj

Metric Value
Total Instances 500
Submitted 500 (100%)
Evaluated 355 (71%)
Resolved 82 (23.1%)
Empty Patches 145 (29%)

}
if fn_name in TOOL_NAME_ALIASES:
fn_name = TOOL_NAME_ALIASES[fn_name]
matching_tool = _find_tool(fn_name)
# Validate function exists in tools
if not matching_tool:
available_tools = [
Expand Down Expand Up @@ -1203,7 +1247,8 @@ def convert_from_multiple_tool_calls_to_single_tool_call_messages(
for message in messages:
role: str
content: Content
role, content = message["role"], message["content"]
role = message["role"]
content = message.get("content") or ""
if role == "assistant":
if message.get("tool_calls") and len(message["tool_calls"]) > 1:
# handle multiple tool calls by breaking them into multiple messages
Expand Down
6 changes: 5 additions & 1 deletion openhands-sdk/openhands/sdk/llm/mixins/non_native_fc.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@ def pre_request_prompt_mock(
kwargs: dict,
) -> tuple[list[dict], dict]:
"""Convert to non-fncall prompting when native tool-calling is off."""
add_iclex = not any(s in self.model for s in ("openhands-lm", "devstral"))
# Skip in-context learning examples for models that understand the format
# or have limited context windows
add_iclex = not any(
s in self.model for s in ("openhands-lm", "devstral", "nemotron")
)
messages = convert_fncall_messages_to_non_fncall_messages(
messages, tools, add_in_context_learning_example=add_iclex
)
Expand Down
Loading