Skip to content

Commit 7d6d905

Browse files
committed
fixes
1 parent 9f6aa7b commit 7d6d905

File tree

3 files changed

+30
-4
lines changed

3 files changed

+30
-4
lines changed

.github/workflows/streaming_compliance.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
name: Streaming Compliance Benchmark
22

33
on:
4+
push:
45
workflow_dispatch:
56
inputs:
67
model:

eval_protocol/benchmarks/test_glm_streaming_compliance.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
"""Benchmarks for GLM streaming regressions (structured output + tool calls)."""
22

3-
from __future__ import annotations
4-
53
import json
64
from typing import Any
75

@@ -145,6 +143,17 @@ def _safe_json_loads(payload: str) -> Any | None:
145143
def test_glm_streaming_structured_output(row: EvaluationRow) -> EvaluationRow:
146144
"""Ensure structured output arrives in assistant content when streaming."""
147145

146+
import os
147+
148+
print(
149+
"DEBUG completion params",
150+
os.environ.get("EP_COMPLETION_PARAMS_JSON"),
151+
"key len",
152+
len(os.environ.get("FIREWORKS_API_KEY", "")),
153+
"acct",
154+
os.environ.get("FIREWORKS_ACCOUNT_ID"),
155+
)
156+
148157
assistant_msg = row.last_assistant_message()
149158
if assistant_msg is None:
150159
row.evaluation_result = EvaluateResult(
@@ -248,6 +257,17 @@ def test_glm_streaming_structured_output(row: EvaluationRow) -> EvaluationRow:
248257
def test_glm_streaming_tool_call(row: EvaluationRow) -> EvaluationRow:
249258
"""Ensure streaming tool calls settle with finish_reason=tool_calls and a single call."""
250259

260+
import os
261+
262+
print(
263+
"DEBUG completion params",
264+
os.environ.get("EP_COMPLETION_PARAMS_JSON"),
265+
"key len",
266+
len(os.environ.get("FIREWORKS_API_KEY", "")),
267+
"acct",
268+
os.environ.get("FIREWORKS_ACCOUNT_ID"),
269+
)
270+
251271
assistant_msg = row.last_assistant_message()
252272
if assistant_msg is None:
253273
row.evaluation_result = EvaluateResult(

eval_protocol/pytest/default_single_turn_rollout_process.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,12 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
9898

9999
finish_reason = getattr(response.choices[0], "finish_reason", None)
100100

101-
assistant_content = response.choices[0].message.content or ""
102-
tool_calls = response.choices[0].message.tool_calls if response.choices[0].message.tool_calls else None
101+
assistant_message = response.choices[0].message
102+
assistant_content = assistant_message.content or ""
103+
reasoning_content = getattr(assistant_message, "reasoning_content", None)
104+
if reasoning_content is None:
105+
reasoning_content = getattr(assistant_message, "reasoning", None)
106+
tool_calls = assistant_message.tool_calls if assistant_message.tool_calls else None
103107

104108
converted_tool_calls = None
105109
if tool_calls:
@@ -136,6 +140,7 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
136140
Message(
137141
role="assistant",
138142
content=assistant_content,
143+
reasoning_content=reasoning_content,
139144
tool_calls=converted_tool_calls,
140145
)
141146
]

0 commit comments

Comments
 (0)