Skip to content

Commit 34ec1c3

Browse files
authored
server : merge contiguous Responses input items into a single assistant message (ggml-org#19773)
* server : merge contiguous input items into a single assistant message * cont : simplify tool call msg * cont : reduce and combine content * cont : fix merging content items
1 parent e877ad8 commit 34ec1c3

1 file changed

Lines changed: 44 additions & 40 deletions

File tree

tools/server/server-common.cpp

Lines changed: 44 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,6 +1105,8 @@ json convert_responses_to_chatcmpl(const json & response_body) {
11051105
};
11061106

11071107
for (json item : input_value) {
1108+
bool merge_prev = !chatcmpl_messages.empty() && chatcmpl_messages.back().value("role", "") == "assistant";
1109+
11081110
if (exists_and_is_string(item, "content")) {
11091111
// #responses_create-input-input_item_list-input_message-content-text_input
11101112
// Only "Input message" contains item["content"]::string
@@ -1193,7 +1195,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
11931195
item.at("type") == "message"
11941196
) {
11951197
// #responses_create-input-input_item_list-item-output_message
1196-
std::vector<json> chatcmpl_content;
1198+
auto chatcmpl_content = json::array();
11971199

11981200
for (const auto & output_text : item.at("content")) {
11991201
const std::string type = json_value(output_text, "type", std::string());
@@ -1210,35 +1212,47 @@ json convert_responses_to_chatcmpl(const json & response_body) {
12101212
});
12111213
}
12121214

1213-
item.erase("status");
1214-
item.erase("type");
1215-
item["content"] = chatcmpl_content;
1216-
chatcmpl_messages.push_back(item);
1215+
if (merge_prev) {
1216+
auto & prev_msg = chatcmpl_messages.back();
1217+
if (!exists_and_is_array(prev_msg, "content")) {
1218+
prev_msg["content"] = json::array();
1219+
}
1220+
auto & prev_content = prev_msg["content"];
1221+
prev_content.insert(prev_content.end(), chatcmpl_content.begin(), chatcmpl_content.end());
1222+
} else {
1223+
item.erase("status");
1224+
item.erase("type");
1225+
item["content"] = chatcmpl_content;
1226+
chatcmpl_messages.push_back(item);
1227+
}
12171228
} else if (exists_and_is_string(item, "arguments") &&
12181229
exists_and_is_string(item, "call_id") &&
12191230
exists_and_is_string(item, "name") &&
12201231
exists_and_is_string(item, "type") &&
12211232
item.at("type") == "function_call"
12221233
) {
12231234
// #responses_create-input-input_item_list-item-function_tool_call
1224-
json msg = json {
1225-
{"role", "assistant"},
1226-
{"tool_calls", json::array({ json {
1227-
{"function", json {
1228-
{"arguments", item.at("arguments")},
1229-
{"name", item.at("name")},
1230-
}},
1231-
{"id", item.at("call_id")},
1232-
{"type", "function"},
1233-
}})},
1235+
json tool_call = {
1236+
{"function", json {
1237+
{"arguments", item.at("arguments")},
1238+
{"name", item.at("name")},
1239+
}},
1240+
{"id", item.at("call_id")},
1241+
{"type", "function"},
12341242
};
12351243

1236-
if (!chatcmpl_messages.empty() && chatcmpl_messages.back().contains("reasoning_content")) {
1237-
// Move reasoning content from dummy message to tool call message
1238-
msg["reasoning_content"] = chatcmpl_messages.back().at("reasoning_content");
1239-
chatcmpl_messages.pop_back();
1244+
if (merge_prev) {
1245+
auto & prev_msg = chatcmpl_messages.back();
1246+
if (!exists_and_is_array(prev_msg, "tool_calls")) {
1247+
prev_msg["tool_calls"] = json::array();
1248+
}
1249+
prev_msg["tool_calls"].push_back(tool_call);
1250+
} else {
1251+
chatcmpl_messages.push_back(json {
1252+
{"role", "assistant"},
1253+
{"tool_calls", json::array({tool_call})}
1254+
});
12401255
}
1241-
chatcmpl_messages.push_back(msg);
12421256
} else if (exists_and_is_string(item, "call_id") &&
12431257
(exists_and_is_string(item, "output") || exists_and_is_array(item, "output")) &&
12441258
exists_and_is_string(item, "type") &&
@@ -1282,12 +1296,16 @@ json convert_responses_to_chatcmpl(const json & response_body) {
12821296
throw std::invalid_argument("item['content']['text'] is not a string");
12831297
}
12841298

1285-
// Pack reasoning content in dummy message
1286-
chatcmpl_messages.push_back(json {
1287-
{"role", "assistant"},
1288-
{"content", json::array()},
1289-
{"reasoning_content", item.at("content")[0].at("text")},
1290-
});
1299+
if (merge_prev) {
1300+
auto & prev_msg = chatcmpl_messages.back();
1301+
prev_msg["reasoning_content"] = item.at("content")[0].at("text");
1302+
} else {
1303+
chatcmpl_messages.push_back(json {
1304+
{"role", "assistant"},
1305+
{"content", json::array()},
1306+
{"reasoning_content", item.at("content")[0].at("text")},
1307+
});
1308+
}
12911309
} else {
12921310
throw std::invalid_argument("Cannot determine type of 'item'");
12931311
}
@@ -1296,20 +1314,6 @@ json convert_responses_to_chatcmpl(const json & response_body) {
12961314
throw std::invalid_argument("'input' must be a string or array of objects");
12971315
}
12981316

1299-
// Remove unused dummy message which contains
1300-
// reasoning content not followed by tool call
1301-
chatcmpl_messages.erase(std::remove_if(
1302-
chatcmpl_messages.begin(),
1303-
chatcmpl_messages.end(),
1304-
[](const json & x){ return x.contains("role") &&
1305-
x.at("role") == "assistant" &&
1306-
x.contains("content") &&
1307-
x.at("content") == json::array() &&
1308-
x.contains("reasoning_content");
1309-
}),
1310-
chatcmpl_messages.end()
1311-
);
1312-
13131317
chatcmpl_body["messages"] = chatcmpl_messages;
13141318

13151319
if (response_body.contains("tools")) {

0 commit comments

Comments
 (0)