From 9b7619088f2035326a00d4d9477f964fbae3660d Mon Sep 17 00:00:00 2001
From: Rivaldo Freitas de Carvalho <rivaldo.freitas.106@gmail.com>
Date: Tue, 19 May 2026 10:47:16 -0300
Subject: [PATCH 1/5] Gambiarra to accept tool calls.

---
 patches/llama-server-tools.patch | 258 +++++++++++++++++++++++++++++++
 setup_env.py                     |   4 +
 utils/apply_local_patches.py     | 160 +++++++++++++++++++
 3 files changed, 422 insertions(+)
 create mode 100644 patches/llama-server-tools.patch
 create mode 100644 utils/apply_local_patches.py

diff --git a/patches/llama-server-tools.patch b/patches/llama-server-tools.patch
new file mode 100644
index 000000000..181ddfa2c
--- /dev/null
+++ b/patches/llama-server-tools.patch
@@ -0,0 +1,258 @@
+diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
+index 69519ef9..2f4e4866 100644
+--- a/examples/server/utils.hpp
++++ b/examples/server/utils.hpp
+@@ -65,9 +65,21 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
+         std::string role = json_value(curr_msg, "role", std::string(""));
+ 
+         std::string content;
+-        if (curr_msg.contains("content")) {
++        if (role == "tool") {
++            // Most GGUF chat templates do not define a native "tool" role.
++            // Present tool output as a user-visible observation for the next
++            // assistant turn instead of relying on template-specific behavior.
++            role = "user";
++            content = "Tool result";
++            if (curr_msg.contains("tool_call_id") && curr_msg["tool_call_id"].is_string()) {
++                content += " for " + curr_msg["tool_call_id"].get<std::string>();
++            }
++            content += ":\n";
++        }
++
++        if (curr_msg.contains("content") && !curr_msg["content"].is_null()) {
+             if (curr_msg["content"].is_string()) {
+-                content = curr_msg["content"].get<std::string>();
++                content += curr_msg["content"].get<std::string>();
+             } else if (curr_msg["content"].is_array()) {
+                 for (const auto & part : curr_msg["content"]) {
+                     if (part.contains("text")) {
+@@ -77,10 +89,18 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
+             } else {
+                 throw std::runtime_error("Invalid 'content' type (ref: https://github.com/ggerganov/llama.cpp/issues/8367)");
+             }
++        } else if (curr_msg.contains("tool_calls")) {
++            content += "Tool calls:\n" + curr_msg["tool_calls"].dump();
+         } else {
+             throw std::runtime_error("Missing 'content' (ref: https://github.com/ggerganov/llama.cpp/issues/8367)");
+         }
+ 
++        if (json_value(curr_msg, "role", std::string("")) == "tool") {
++            content += "\n\nUse the tool result above to answer the user's request now. "
++                       "Do not call another tool. Do not output a tool name, an Input line, "
++                       "tool_calls JSON, or any action syntax.";
++        }
++
+         chat.push_back({role, content});
+     }
+ 
+@@ -316,6 +336,134 @@ static bool server_sent_event(httplib::DataSink & sink, const char * event, cons
+     return sink.write(str.c_str(), str.size());
+ }
+ 
++static std::string trim(const std::string & str) {
++    const auto first = str.find_first_not_of(" \t\n\r");
++    if (first == std::string::npos) {
++        return "";
++    }
++
++    const auto last = str.find_last_not_of(" \t\n\r");
++    return str.substr(first, last - first + 1);
++}
++
++static json normalize_tool_call_arguments(const json & args) {
++    if (args.is_string()) {
++        try {
++            return json::parse(args.get<std::string>());
++        } catch (const json::parse_error &) {
++            return json::object({{"input", args.get<std::string>()}});
++        }
++    }
++
++    if (args.is_object()) {
++        return args;
++    }
++
++    if (args.is_null()) {
++        return json::object();
++    }
++
++    return json::object({{"value", args}});
++}
++
++static json normalize_tool_call(const json & call, const std::string & completion_id, size_t index) {
++    std::string name;
++    json arguments = json::object();
++
++    if (call.contains("function") && call["function"].is_object()) {
++        const auto & fn = call["function"];
++        name = json_value(fn, "name", std::string());
++        arguments = normalize_tool_call_arguments(json_value(fn, "arguments", json::object()));
++    } else {
++        name = json_value(call, "name", std::string());
++        arguments = normalize_tool_call_arguments(json_value(call, "arguments", json::object()));
++    }
++
++    if (name.empty()) {
++        throw std::runtime_error("Tool call is missing function name");
++    }
++
++    return json{
++        {"id", json_value(call, "id", completion_id + "_tool_" + std::to_string(index))},
++        {"type", "function"},
++        {"function", json{
++            {"name", name},
++            {"arguments", arguments.dump()},
++        }},
++    };
++}
++
++static json parse_tool_calls_from_content(const std::string & content, const std::string & completion_id) {
++    std::string text = trim(content);
++
++    if (text.rfind("```json", 0) == 0) {
++        text = trim(text.substr(7));
++    } else if (text.rfind("```", 0) == 0) {
++        text = trim(text.substr(3));
++    }
++    if (text.size() >= 3 && text.substr(text.size() - 3) == "```") {
++        text = trim(text.substr(0, text.size() - 3));
++    }
++
++    json parsed;
++    try {
++        parsed = json::parse(text);
++    } catch (const json::parse_error &) {
++        return json::array();
++    }
++
++    json calls = json::array();
++    const json * tool_calls_json = nullptr;
++    if (parsed.contains("tool_calls") && parsed["tool_calls"].is_array()) {
++        tool_calls_json = &parsed["tool_calls"];
++    } else if (parsed.contains("tool_call") && parsed["tool_call"].is_array()) {
++        tool_calls_json = &parsed["tool_call"];
++    }
++
++    if (tool_calls_json != nullptr) {
++        for (size_t i = 0; i < tool_calls_json->size(); ++i) {
++            try {
++                calls.push_back(normalize_tool_call((*tool_calls_json)[i], completion_id, i));
++            } catch (const std::runtime_error &) {
++                return json::array();
++            }
++        }
++    } else if (parsed.contains("function") || parsed.contains("name")) {
++        try {
++            calls.push_back(normalize_tool_call(parsed, completion_id, 0));
++        } catch (const std::runtime_error &) {
++            return json::array();
++        }
++    }
++
++    return calls;
++}
++
++static std::string tools_prompt(const json & body) {
++    if (!body.contains("tools") || !body["tools"].is_array() || body["tools"].empty()) {
++        return "";
++    }
++
++    if (body.contains("tool_choice") && body["tool_choice"].is_string() && body["tool_choice"].get<std::string>() == "none") {
++        return "";
++    }
++
++    std::ostringstream ss;
++    ss << "You can call tools when needed. Available tools are provided as JSON below.\n";
++    ss << body["tools"].dump(2) << "\n\n";
++    ss << "If a tool is required, respond only with strict JSON in this exact shape:\n";
++    ss << "{\"tool_calls\":[{\"name\":\"tool_name\",\"arguments\":{\"arg\":\"value\"}}]}\n";
++    ss << "The key must be exactly \"tool_calls\". Do not use \"tool_call\".\n";
++    ss << "Do not include markdown or explanatory text around a tool call. ";
++    ss << "If no tool is required, answer normally.";
++
++    if (body.contains("tool_choice") && body["tool_choice"].is_object()) {
++        ss << "\nThe requested tool_choice is:\n" << body["tool_choice"].dump(2);
++    }
++
++    return ss.str();
++}
++
+ //
+ // OAI utils
+ //
+@@ -329,7 +477,13 @@ static json oaicompat_completion_params_parse(
+     llama_params["__oaicompat"] = true;
+ 
+     // Apply chat template to the list of messages
+-    llama_params["prompt"] = format_chat(model, chat_template, body.at("messages"));
++    json messages = body.at("messages");
++    const std::string tool_instructions = tools_prompt(body);
++    if (!tool_instructions.empty()) {
++        messages.insert(messages.begin(), json{{"role", "system"}, {"content", tool_instructions}});
++        llama_params["__oaicompat_tools"] = body["tools"];
++    }
++    llama_params["prompt"] = format_chat(model, chat_template, messages);
+ 
+     // Handle "stop" field
+     if (body.contains("stop") && body.at("stop").is_string()) {
+@@ -367,7 +521,7 @@ static json oaicompat_completion_params_parse(
+     }
+ 
+     // Params supported by OAI but unsupported by llama.cpp
+-    static const std::vector<std::string> unsupported_params { "tools", "tool_choice" };
++    static const std::vector<std::string> unsupported_params {};
+     for (const auto & param : unsupported_params) {
+         if (body.contains(param)) {
+             throw std::runtime_error("Unsupported param: " + param);
+@@ -378,6 +532,9 @@ static json oaicompat_completion_params_parse(
+     // This allows user to use llama.cpp-specific params like "mirostat", "tfs_z",... via OAI endpoint.
+     // See "launch_slot_with_task()" for a complete list of params supported by llama.cpp
+     for (const auto & item : body.items()) {
++        if (item.key() == "tools" || item.key() == "tool_choice") {
++            continue;
++        }
+         // Exception: if "n_predict" is present, we overwrite the value specified earlier by "max_tokens"
+         if (!llama_params.contains(item.key()) || item.key() == "n_predict") {
+             llama_params[item.key()] = item.value();
+@@ -399,14 +556,33 @@ static json format_final_response_oaicompat(const json & request, const json & r
+         finish_reason = "stop";
+     }
+ 
+-    json choices =
+-        streaming ? json::array({json{{"finish_reason", finish_reason},
+-                                        {"index", 0},
+-                                        {"delta", json::object()}}})
+-                  : json::array({json{{"finish_reason", finish_reason},
+-                                        {"index", 0},
+-                                        {"message", json{{"content", content},
+-                                                         {"role", "assistant"}}}}});
++    json choices;
++    json tool_calls = json::array();
++    if (!streaming) {
++        // Some agent runtimes describe tools in the prompt instead of sending
++        // the OpenAI "tools" parameter. Still upgrade strict tool-call JSON
++        // content into OpenAI-compatible message.tool_calls.
++        // This keeps n8n/LangChain agents from treating the call as plain text.
++        tool_calls = parse_tool_calls_from_content(content, completion_id);
++    }
++
++    if (!tool_calls.empty()) {
++        finish_reason = "tool_calls";
++        choices = json::array({json{{"finish_reason", finish_reason},
++                                    {"index", 0},
++                                    {"message", json{{"content", nullptr},
++                                                     {"role", "assistant"},
++                                                     {"tool_calls", tool_calls}}}}});
++    } else {
++        choices =
++            streaming ? json::array({json{{"finish_reason", finish_reason},
++                                            {"index", 0},
++                                            {"delta", json::object()}}})
++                      : json::array({json{{"finish_reason", finish_reason},
++                                            {"index", 0},
++                                            {"message", json{{"content", content},
++                                                             {"role", "assistant"}}}}});
++    }
+ 
+     std::time_t t = std::time(0);
+ 
diff --git a/setup_env.py b/setup_env.py
index 3bf5fb8f7..21e9edb43 100644
--- a/setup_env.py
+++ b/setup_env.py
@@ -215,7 +215,11 @@ def compile():
     # run_command(["cmake", "--build", "build", "--target", "llama-cli", "--config", "Release"])
     run_command(["cmake", "--build", "build", "--config", "Release"], log_step="compile")
 
+def apply_local_patches():
+    run_command([sys.executable, "utils/apply_local_patches.py"], log_step="apply_local_patches")
+
 def main():
+    apply_local_patches()
     setup_gguf()
     gen_code()
     compile()
diff --git a/utils/apply_local_patches.py b/utils/apply_local_patches.py
new file mode 100644
index 000000000..9f2846d2c
--- /dev/null
+++ b/utils/apply_local_patches.py
@@ -0,0 +1,160 @@
+import sys
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+SERVER_CPP = ROOT / "3rdparty" / "llama.cpp" / "examples" / "server" / "server.cpp"
+LLAMA_CPP = ROOT / "3rdparty" / "llama.cpp"
+PATCHES = ROOT / "patches"
+
+OLD_CORS_BLOCK = """    // CORS preflight
+    svr->Options(R\"(.*)\", [](const httplib::Request &, httplib::Response & res) {
+        // Access-Control-Allow-Origin is already set by middleware
+        res.set_header(\"Access-Control-Allow-Credentials\", \"true\");
+        res.set_header(\"Access-Control-Allow-Methods\",     \"POST\");
+        res.set_header(\"Access-Control-Allow-Headers\",     \"*\");
+        return res.set_content(\"\", \"text/html\"); // blank response, no data
+    });
+"""
+
+NEW_CORS_BLOCK = """    // CORS preflight
+    svr->Options(R\"(.*)\", [](const httplib::Request & req, httplib::Response & res) {
+        // Access-Control-Allow-Origin is already set by middleware
+        res.set_header(\"Access-Control-Allow-Credentials\", \"true\");
+        res.set_header(\"Access-Control-Allow-Methods\",     \"GET, POST, OPTIONS\");
+
+        const auto requested_headers = req.get_header_value(\"Access-Control-Request-Headers\");
+        if (!requested_headers.empty()) {
+            res.set_header(\"Access-Control-Allow-Headers\", requested_headers);
+        } else {
+            res.set_header(\"Access-Control-Allow-Headers\", \"*\");
+        }
+
+        return res.set_content(\"\", \"text/html\"); // blank response, no data
+    });
+"""
+
+
+def ensure_server_cors_patch() -> None:
+    if not SERVER_CPP.exists():
+        print(f"Skipping llama.cpp CORS patch: file not found at {SERVER_CPP}")
+        return
+
+    content = SERVER_CPP.read_text(encoding="utf-8")
+    if NEW_CORS_BLOCK in content:
+        print("llama.cpp CORS patch already applied")
+        return
+
+    if OLD_CORS_BLOCK not in content:
+        print("Failed to locate original CORS block in server.cpp", file=sys.stderr)
+        sys.exit(1)
+
+    SERVER_CPP.write_text(content.replace(OLD_CORS_BLOCK, NEW_CORS_BLOCK, 1), encoding="utf-8")
+    print("Applied llama.cpp CORS patch")
+
+
+def parse_hunk_header(line: str) -> int:
+    old_range = line.split(" ", 2)[1]
+    return int(old_range[1:].split(",", 1)[0])
+
+
+def apply_unified_patch(patch: Path) -> bool:
+    patch_lines = patch.read_text(encoding="utf-8").splitlines()
+    i = 0
+    applied_any = False
+
+    while i < len(patch_lines):
+        if not patch_lines[i].startswith("diff --git "):
+            i += 1
+            continue
+
+        i += 1
+        while i < len(patch_lines) and not patch_lines[i].startswith("--- "):
+            i += 1
+        if i >= len(patch_lines):
+            return False
+
+        i += 1
+        if i >= len(patch_lines) or not patch_lines[i].startswith("+++ b/"):
+            return False
+
+        rel_path = patch_lines[i][6:]
+        target = LLAMA_CPP / rel_path
+        if not target.exists():
+            print(f"Patch target not found: {target}", file=sys.stderr)
+            return False
+
+        original = target.read_text(encoding="utf-8")
+        newline = "\r\n" if "\r\n" in original else "\n"
+        content = original.splitlines()
+        offset = 0
+
+        i += 1
+        while i < len(patch_lines):
+            if patch_lines[i].startswith("diff --git "):
+                break
+            if not patch_lines[i].startswith("@@ "):
+                i += 1
+                continue
+
+            start = parse_hunk_header(patch_lines[i]) - 1 + offset
+            i += 1
+            old_lines: list[str] = []
+            new_lines: list[str] = []
+
+            while i < len(patch_lines) and not patch_lines[i].startswith("@@ ") and not patch_lines[i].startswith("diff --git "):
+                line = patch_lines[i]
+                if line.startswith("\\ No newline"):
+                    i += 1
+                    continue
+
+                marker = line[:1]
+                value = line[1:]
+                if marker == " ":
+                    old_lines.append(value)
+                    new_lines.append(value)
+                elif marker == "-":
+                    old_lines.append(value)
+                elif marker == "+":
+                    new_lines.append(value)
+                else:
+                    return False
+                i += 1
+
+            if content[start:start + len(old_lines)] != old_lines:
+                return False
+
+            content[start:start + len(old_lines)] = new_lines
+            offset += len(new_lines) - len(old_lines)
+
+        target.write_text(newline.join(content) + newline, encoding="utf-8")
+        applied_any = True
+
+    return applied_any
+
+
+def apply_patch_file(patch: Path) -> None:
+    if not patch.exists():
+        print(f"Skipping patch: file not found at {patch}")
+        return
+
+    if apply_unified_patch(patch):
+        print(f"Applied {patch.name}")
+        return
+
+    utils_hpp = LLAMA_CPP / "examples" / "server" / "utils.hpp"
+    if utils_hpp.exists() and "tools_prompt(const json & body)" in utils_hpp.read_text(encoding="utf-8"):
+        print(f"{patch.name} already applied")
+        return
+
+    print(f"Failed to apply {patch.name}", file=sys.stderr)
+    sys.exit(1)
+
+
+def main() -> None:
+    ensure_server_cors_patch()
+    apply_patch_file(PATCHES / "llama-server-tools.patch")
+
+
+if __name__ == "__main__":
+    main()

From 7a0f8afaa97ea36232448dcb5180ae2b1eade815 Mon Sep 17 00:00:00 2001
From: Rivaldo Freitas de Carvalho <rivaldo.freitas.106@gmail.com>
Date: Tue, 19 May 2026 11:29:16 -0300
Subject: [PATCH 2/5] feat: enhance CORS patching and refactor patch
 application logic

---
 patches/llama-server-tools.patch |  16 ++--
 utils/apply_local_patches.py     | 145 +++++++++++++++++++++++--------
 2 files changed, 120 insertions(+), 41 deletions(-)

diff --git a/patches/llama-server-tools.patch b/patches/llama-server-tools.patch
index 181ddfa2c..dd90b3e67 100644
--- a/patches/llama-server-tools.patch
+++ b/patches/llama-server-tools.patch
@@ -45,7 +45,7 @@ index 69519ef9..2f4e4866 100644
          chat.push_back({role, content});
      }
  
-@@ -316,6 +336,134 @@ static bool server_sent_event(httplib::DataSink & sink, const char * event, cons
+@@ -316,6 +336,137 @@ static bool server_sent_event(httplib::DataSink & sink, const char * event, cons
      return sink.write(str.c_str(), str.size());
  }
  
@@ -126,11 +126,15 @@ index 69519ef9..2f4e4866 100644
 +    }
 +
 +    json calls = json::array();
++    json normalized_tool_calls = json::array();
 +    const json * tool_calls_json = nullptr;
 +    if (parsed.contains("tool_calls") && parsed["tool_calls"].is_array()) {
 +        tool_calls_json = &parsed["tool_calls"];
 +    } else if (parsed.contains("tool_call") && parsed["tool_call"].is_array()) {
 +        tool_calls_json = &parsed["tool_call"];
++    } else if (parsed.contains("tool_call") && parsed["tool_call"].is_object()) {
++        normalized_tool_calls.push_back(parsed["tool_call"]);
++        tool_calls_json = &normalized_tool_calls;
 +    }
 +
 +    if (tool_calls_json != nullptr) {
@@ -166,7 +170,7 @@ index 69519ef9..2f4e4866 100644
 +    ss << body["tools"].dump(2) << "\n\n";
 +    ss << "If a tool is required, respond only with strict JSON in this exact shape:\n";
 +    ss << "{\"tool_calls\":[{\"name\":\"tool_name\",\"arguments\":{\"arg\":\"value\"}}]}\n";
-+    ss << "The key must be exactly \"tool_calls\". Do not use \"tool_call\".\n";
++    ss << "Prefer the \"tool_calls\" array key. A single \"tool_call\" object is accepted for compatibility.\n";
 +    ss << "Do not include markdown or explanatory text around a tool call. ";
 +    ss << "If no tool is required, answer normally.";
 +
@@ -180,7 +184,7 @@ index 69519ef9..2f4e4866 100644
  //
  // OAI utils
  //
-@@ -329,7 +477,13 @@ static json oaicompat_completion_params_parse(
+@@ -329,7 +480,13 @@ static json oaicompat_completion_params_parse(
      llama_params["__oaicompat"] = true;
  
      // Apply chat template to the list of messages
@@ -195,7 +199,7 @@ index 69519ef9..2f4e4866 100644
  
      // Handle "stop" field
      if (body.contains("stop") && body.at("stop").is_string()) {
-@@ -367,7 +521,7 @@ static json oaicompat_completion_params_parse(
+@@ -367,7 +524,7 @@ static json oaicompat_completion_params_parse(
      }
  
      // Params supported by OAI but unsupported by llama.cpp
@@ -204,7 +208,7 @@ index 69519ef9..2f4e4866 100644
      for (const auto & param : unsupported_params) {
          if (body.contains(param)) {
              throw std::runtime_error("Unsupported param: " + param);
-@@ -378,6 +532,9 @@ static json oaicompat_completion_params_parse(
+@@ -378,6 +535,9 @@ static json oaicompat_completion_params_parse(
      // This allows user to use llama.cpp-specific params like "mirostat", "tfs_z",... via OAI endpoint.
      // See "launch_slot_with_task()" for a complete list of params supported by llama.cpp
      for (const auto & item : body.items()) {
@@ -214,7 +218,7 @@ index 69519ef9..2f4e4866 100644
          // Exception: if "n_predict" is present, we overwrite the value specified earlier by "max_tokens"
          if (!llama_params.contains(item.key()) || item.key() == "n_predict") {
              llama_params[item.key()] = item.value();
-@@ -399,14 +556,33 @@ static json format_final_response_oaicompat(const json & request, const json & r
+@@ -399,14 +559,33 @@ static json format_final_response_oaicompat(const json & request, const json & r
          finish_reason = "stop";
      }
  
diff --git a/utils/apply_local_patches.py b/utils/apply_local_patches.py
index 9f2846d2c..eeeba66cb 100644
--- a/utils/apply_local_patches.py
+++ b/utils/apply_local_patches.py
@@ -1,5 +1,7 @@
 import sys
+from dataclasses import dataclass
 from pathlib import Path
+from typing import Optional
 
 
 ROOT = Path(__file__).resolve().parents[1]
@@ -7,16 +9,6 @@
 LLAMA_CPP = ROOT / "3rdparty" / "llama.cpp"
 PATCHES = ROOT / "patches"
 
-OLD_CORS_BLOCK = """    // CORS preflight
-    svr->Options(R\"(.*)\", [](const httplib::Request &, httplib::Response & res) {
-        // Access-Control-Allow-Origin is already set by middleware
-        res.set_header(\"Access-Control-Allow-Credentials\", \"true\");
-        res.set_header(\"Access-Control-Allow-Methods\",     \"POST\");
-        res.set_header(\"Access-Control-Allow-Headers\",     \"*\");
-        return res.set_content(\"\", \"text/html\"); // blank response, no data
-    });
-"""
-
 NEW_CORS_BLOCK = """    // CORS preflight
     svr->Options(R\"(.*)\", [](const httplib::Request & req, httplib::Response & res) {
         // Access-Control-Allow-Origin is already set by middleware
@@ -35,21 +27,55 @@
 """
 
 
+@dataclass
+class PatchHunk:
+    old_start: int
+    old_lines: list[str]
+    new_lines: list[str]
+
+
+@dataclass
+class FilePatch:
+    target: Path
+    hunks: list[PatchHunk]
+
+
 def ensure_server_cors_patch() -> None:
     if not SERVER_CPP.exists():
         print(f"Skipping llama.cpp CORS patch: file not found at {SERVER_CPP}")
         return
 
     content = SERVER_CPP.read_text(encoding="utf-8")
-    if NEW_CORS_BLOCK in content:
+    cors_comment = "    // CORS preflight"
+    start = content.find(cors_comment)
+    if start == -1:
+        print("Failed to locate CORS preflight block in server.cpp", file=sys.stderr)
+        sys.exit(1)
+
+    end_marker = "    });"
+    end = content.find(end_marker, start)
+    if end == -1:
+        print("Failed to locate end of CORS preflight block in server.cpp", file=sys.stderr)
+        sys.exit(1)
+    end += len(end_marker)
+
+    current_block = content[start:end]
+    if "Access-Control-Request-Headers" in current_block:
         print("llama.cpp CORS patch already applied")
         return
 
-    if OLD_CORS_BLOCK not in content:
-        print("Failed to locate original CORS block in server.cpp", file=sys.stderr)
+    required_markers = (
+        "svr->Options",
+        "httplib::Request &",
+        "httplib::Response & res",
+        'res.set_header("Access-Control-Allow-Methods"',
+        'res.set_header("Access-Control-Allow-Headers"',
+    )
+    if not all(marker in current_block for marker in required_markers):
+        print("Failed to locate expected CORS preflight lines in server.cpp", file=sys.stderr)
         sys.exit(1)
 
-    SERVER_CPP.write_text(content.replace(OLD_CORS_BLOCK, NEW_CORS_BLOCK, 1), encoding="utf-8")
+    SERVER_CPP.write_text(content[:start] + NEW_CORS_BLOCK.rstrip("\n") + content[end:], encoding="utf-8")
     print("Applied llama.cpp CORS patch")
 
 
@@ -58,10 +84,10 @@ def parse_hunk_header(line: str) -> int:
     return int(old_range[1:].split(",", 1)[0])
 
 
-def apply_unified_patch(patch: Path) -> bool:
+def parse_unified_patch(patch: Path) -> Optional[list[FilePatch]]:
     patch_lines = patch.read_text(encoding="utf-8").splitlines()
     i = 0
-    applied_any = False
+    file_patches: list[FilePatch] = []
 
     while i < len(patch_lines):
         if not patch_lines[i].startswith("diff --git "):
@@ -76,20 +102,16 @@ def apply_unified_patch(patch: Path) -> bool:
 
         i += 1
         if i >= len(patch_lines) or not patch_lines[i].startswith("+++ b/"):
-            return False
+            return None
 
         rel_path = patch_lines[i][6:]
         target = LLAMA_CPP / rel_path
         if not target.exists():
             print(f"Patch target not found: {target}", file=sys.stderr)
-            return False
-
-        original = target.read_text(encoding="utf-8")
-        newline = "\r\n" if "\r\n" in original else "\n"
-        content = original.splitlines()
-        offset = 0
+            return None
 
         i += 1
+        hunks: list[PatchHunk] = []
         while i < len(patch_lines):
             if patch_lines[i].startswith("diff --git "):
                 break
@@ -97,7 +119,7 @@ def apply_unified_patch(patch: Path) -> bool:
                 i += 1
                 continue
 
-            start = parse_hunk_header(patch_lines[i]) - 1 + offset
+            old_start = parse_hunk_header(patch_lines[i])
             i += 1
             old_lines: list[str] = []
             new_lines: list[str] = []
@@ -118,19 +140,73 @@ def apply_unified_patch(patch: Path) -> bool:
                 elif marker == "+":
                     new_lines.append(value)
                 else:
-                    return False
+                    return None
                 i += 1
 
-            if content[start:start + len(old_lines)] != old_lines:
-                return False
+            hunks.append(PatchHunk(old_start=old_start, old_lines=old_lines, new_lines=new_lines))
+
+        file_patches.append(FilePatch(target=target, hunks=hunks))
+
+    return file_patches
+
+
+def simulate_file_patch(file_patch: FilePatch) -> tuple[str, Optional[str]]:
+    original = file_patch.target.read_text(encoding="utf-8")
+    newline = "\r\n" if "\r\n" in original else "\n"
+    original_lines = original.splitlines()
+    patched_lines = original_lines.copy()
+    apply_offset = 0
+    already_offset = 0
+    can_apply = True
+    already_applied = True
+
+    for hunk in file_patch.hunks:
+        apply_start = hunk.old_start - 1 + apply_offset
+        if patched_lines[apply_start:apply_start + len(hunk.old_lines)] == hunk.old_lines:
+            patched_lines[apply_start:apply_start + len(hunk.old_lines)] = hunk.new_lines
+            apply_offset += len(hunk.new_lines) - len(hunk.old_lines)
+        else:
+            can_apply = False
+
+        already_start = hunk.old_start - 1 + already_offset
+        if original_lines[already_start:already_start + len(hunk.new_lines)] != hunk.new_lines:
+            already_applied = False
+        already_offset += len(hunk.new_lines) - len(hunk.old_lines)
 
-            content[start:start + len(old_lines)] = new_lines
-            offset += len(new_lines) - len(old_lines)
+    if can_apply:
+        return "apply", newline.join(patched_lines) + newline
+    if already_applied:
+        return "already", None
+    return "failed", None
 
-        target.write_text(newline.join(content) + newline, encoding="utf-8")
+
+def apply_unified_patch(patch: Path) -> str:
+    file_patches = parse_unified_patch(patch)
+    if not file_patches:
+        return "failed"
+
+    pending_writes: list[tuple[Path, str]] = []
+    applied_any = False
+    already_count = 0
+
+    for file_patch in file_patches:
+        status, new_content = simulate_file_patch(file_patch)
+        if status == "failed":
+            return "failed"
+        if status == "already":
+            already_count += 1
+            continue
+        pending_writes.append((file_patch.target, new_content or ""))
         applied_any = True
 
-    return applied_any
+    if applied_any and already_count:
+        print(f"Refusing to partially apply {patch.name}: some hunks are already present", file=sys.stderr)
+        return "failed"
+
+    for target, new_content in pending_writes:
+        target.write_text(new_content, encoding="utf-8")
+
+    return "applied" if applied_any else "already"
 
 
 def apply_patch_file(patch: Path) -> None:
@@ -138,12 +214,11 @@ def apply_patch_file(patch: Path) -> None:
         print(f"Skipping patch: file not found at {patch}")
         return
 
-    if apply_unified_patch(patch):
+    patch_status = apply_unified_patch(patch)
+    if patch_status == "applied":
         print(f"Applied {patch.name}")
         return
-
-    utils_hpp = LLAMA_CPP / "examples" / "server" / "utils.hpp"
-    if utils_hpp.exists() and "tools_prompt(const json & body)" in utils_hpp.read_text(encoding="utf-8"):
+    if patch_status == "already":
         print(f"{patch.name} already applied")
         return
 

From dca304ac6d25e51ce746bab67ab05a080cbc70f5 Mon Sep 17 00:00:00 2001
From: Rivaldo Freitas de Carvalho <rivaldo.freitas.106@gmail.com>
Date: Tue, 19 May 2026 11:58:14 -0300
Subject: [PATCH 3/5] Address Copilot patch review feedback

---
 patches/llama-server-tools.patch | 17 +++++++++++------
 setup_env.py                     |  7 +++++++
 utils/apply_local_patches.py     | 12 +++++++++---
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/patches/llama-server-tools.patch b/patches/llama-server-tools.patch
index dd90b3e67..8a0617773 100644
--- a/patches/llama-server-tools.patch
+++ b/patches/llama-server-tools.patch
@@ -45,7 +45,7 @@ index 69519ef9..2f4e4866 100644
          chat.push_back({role, content});
      }
  
-@@ -316,6 +336,137 @@ static bool server_sent_event(httplib::DataSink & sink, const char * event, cons
+@@ -316,6 +336,143 @@ static bool server_sent_event(httplib::DataSink & sink, const char * event, cons
      return sink.write(str.c_str(), str.size());
  }
  
@@ -125,6 +125,10 @@ index 69519ef9..2f4e4866 100644
 +        return json::array();
 +    }
 +
++    if (!parsed.is_object()) {
++        return json::array();
++    }
++
 +    json calls = json::array();
 +    json normalized_tool_calls = json::array();
 +    const json * tool_calls_json = nullptr;
@@ -145,7 +149,8 @@ index 69519ef9..2f4e4866 100644
 +                return json::array();
 +            }
 +        }
-+    } else if (parsed.contains("function") || parsed.contains("name")) {
++    } else if ((parsed.contains("function") && parsed["function"].is_object() && parsed["function"].contains("name"))
++               || (parsed.contains("name") && parsed["name"].is_string() && parsed.contains("arguments"))) {
 +        try {
 +            calls.push_back(normalize_tool_call(parsed, completion_id, 0));
 +        } catch (const std::runtime_error &) {
@@ -184,7 +189,7 @@ index 69519ef9..2f4e4866 100644
  //
  // OAI utils
  //
-@@ -329,7 +480,13 @@ static json oaicompat_completion_params_parse(
+@@ -329,7 +486,13 @@ static json oaicompat_completion_params_parse(
      llama_params["__oaicompat"] = true;
  
      // Apply chat template to the list of messages
@@ -199,7 +204,7 @@ index 69519ef9..2f4e4866 100644
  
      // Handle "stop" field
      if (body.contains("stop") && body.at("stop").is_string()) {
-@@ -367,7 +524,7 @@ static json oaicompat_completion_params_parse(
+@@ -367,7 +530,7 @@ static json oaicompat_completion_params_parse(
      }
  
      // Params supported by OAI but unsupported by llama.cpp
@@ -208,7 +213,7 @@ index 69519ef9..2f4e4866 100644
      for (const auto & param : unsupported_params) {
          if (body.contains(param)) {
              throw std::runtime_error("Unsupported param: " + param);
-@@ -378,6 +535,9 @@ static json oaicompat_completion_params_parse(
+@@ -378,6 +541,9 @@ static json oaicompat_completion_params_parse(
      // This allows user to use llama.cpp-specific params like "mirostat", "tfs_z",... via OAI endpoint.
      // See "launch_slot_with_task()" for a complete list of params supported by llama.cpp
      for (const auto & item : body.items()) {
@@ -218,7 +223,7 @@ index 69519ef9..2f4e4866 100644
          // Exception: if "n_predict" is present, we overwrite the value specified earlier by "max_tokens"
          if (!llama_params.contains(item.key()) || item.key() == "n_predict") {
              llama_params[item.key()] = item.value();
-@@ -399,14 +559,33 @@ static json format_final_response_oaicompat(const json & request, const json & r
+@@ -399,14 +565,33 @@ static json format_final_response_oaicompat(const json & request, const json & r
          finish_reason = "stop";
      }
  
diff --git a/setup_env.py b/setup_env.py
index 21e9edb43..0352cc679 100644
--- a/setup_env.py
+++ b/setup_env.py
@@ -216,6 +216,13 @@ def compile():
     run_command(["cmake", "--build", "build", "--config", "Release"], log_step="compile")
 
 def apply_local_patches():
+    llama_cpp = Path("3rdparty") / "llama.cpp"
+    server_cpp = llama_cpp / "examples" / "server" / "server.cpp"
+    utils_hpp = llama_cpp / "examples" / "server" / "utils.hpp"
+    if not llama_cpp.exists() or not server_cpp.exists() or not utils_hpp.exists():
+        logging.info("Skipping local llama.cpp patches: 3rdparty/llama.cpp server sources not found.")
+        return
+
     run_command([sys.executable, "utils/apply_local_patches.py"], log_step="apply_local_patches")
 
 def main():
diff --git a/utils/apply_local_patches.py b/utils/apply_local_patches.py
index eeeba66cb..912a972a2 100644
--- a/utils/apply_local_patches.py
+++ b/utils/apply_local_patches.py
@@ -75,7 +75,9 @@ def ensure_server_cors_patch() -> None:
         print("Failed to locate expected CORS preflight lines in server.cpp", file=sys.stderr)
         sys.exit(1)
 
-    SERVER_CPP.write_text(content[:start] + NEW_CORS_BLOCK.rstrip("\n") + content[end:], encoding="utf-8")
+    newline = "\r\n" if "\r\n" in content else "\n"
+    cors_block = NEW_CORS_BLOCK.rstrip("\n").replace("\n", newline)
+    SERVER_CPP.write_text(content[:start] + cors_block + content[end:], encoding="utf-8")
     print("Applied llama.cpp CORS patch")
 
 
@@ -98,7 +100,7 @@ def parse_unified_patch(patch: Path) -> Optional[list[FilePatch]]:
         while i < len(patch_lines) and not patch_lines[i].startswith("--- "):
             i += 1
         if i >= len(patch_lines):
-            return False
+            return None
 
         i += 1
         if i >= len(patch_lines) or not patch_lines[i].startswith("+++ b/"):
@@ -153,6 +155,7 @@ def parse_unified_patch(patch: Path) -> Optional[list[FilePatch]]:
 def simulate_file_patch(file_patch: FilePatch) -> tuple[str, Optional[str]]:
     original = file_patch.target.read_text(encoding="utf-8")
     newline = "\r\n" if "\r\n" in original else "\n"
+    has_trailing_newline = original.endswith(("\n", "\r"))
     original_lines = original.splitlines()
     patched_lines = original_lines.copy()
     apply_offset = 0
@@ -174,7 +177,10 @@ def simulate_file_patch(file_patch: FilePatch) -> tuple[str, Optional[str]]:
         already_offset += len(hunk.new_lines) - len(hunk.old_lines)
 
     if can_apply:
-        return "apply", newline.join(patched_lines) + newline
+        patched_content = newline.join(patched_lines)
+        if has_trailing_newline:
+            patched_content += newline
+        return "apply", patched_content
     if already_applied:
         return "already", None
     return "failed", None

From acd69a433d20d89dfe88697ba9a4c223832e4d8d Mon Sep 17 00:00:00 2001
From: Rivaldo Freitas de Carvalho <rivaldo.freitas.106@gmail.com>
Date: Tue, 19 May 2026 12:19:42 -0300
Subject: [PATCH 4/5] Handle escaped tool call JSON

---
 patches/llama-server-tools.patch | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/patches/llama-server-tools.patch b/patches/llama-server-tools.patch
index 8a0617773..bd4681685 100644
--- a/patches/llama-server-tools.patch
+++ b/patches/llama-server-tools.patch
@@ -45,7 +45,7 @@ index 69519ef9..2f4e4866 100644
          chat.push_back({role, content});
      }
  
-@@ -316,6 +336,143 @@ static bool server_sent_event(httplib::DataSink & sink, const char * event, cons
+@@ -316,6 +336,152 @@ static bool server_sent_event(httplib::DataSink & sink, const char * event, cons
      return sink.write(str.c_str(), str.size());
  }
  
@@ -125,6 +125,14 @@ index 69519ef9..2f4e4866 100644
 +        return json::array();
 +    }
 +
++    if (parsed.is_string()) {
++        try {
++            parsed = json::parse(trim(parsed.get<std::string>()));
++        } catch (const json::parse_error &) {
++            return json::array();
++        }
++    }
++
 +    if (!parsed.is_object()) {
 +        return json::array();
 +    }
@@ -189,7 +197,7 @@ index 69519ef9..2f4e4866 100644
  //
  // OAI utils
  //
-@@ -329,7 +486,13 @@ static json oaicompat_completion_params_parse(
+@@ -329,7 +495,13 @@ static json oaicompat_completion_params_parse(
      llama_params["__oaicompat"] = true;
  
      // Apply chat template to the list of messages
@@ -204,7 +212,7 @@ index 69519ef9..2f4e4866 100644
  
      // Handle "stop" field
      if (body.contains("stop") && body.at("stop").is_string()) {
-@@ -367,7 +530,7 @@ static json oaicompat_completion_params_parse(
+@@ -367,7 +539,7 @@ static json oaicompat_completion_params_parse(
      }
  
      // Params supported by OAI but unsupported by llama.cpp
@@ -213,7 +221,7 @@ index 69519ef9..2f4e4866 100644
      for (const auto & param : unsupported_params) {
          if (body.contains(param)) {
              throw std::runtime_error("Unsupported param: " + param);
-@@ -378,6 +541,9 @@ static json oaicompat_completion_params_parse(
+@@ -378,6 +550,9 @@ static json oaicompat_completion_params_parse(
      // This allows user to use llama.cpp-specific params like "mirostat", "tfs_z",... via OAI endpoint.
      // See "launch_slot_with_task()" for a complete list of params supported by llama.cpp
      for (const auto & item : body.items()) {
@@ -223,7 +231,7 @@ index 69519ef9..2f4e4866 100644
          // Exception: if "n_predict" is present, we overwrite the value specified earlier by "max_tokens"
          if (!llama_params.contains(item.key()) || item.key() == "n_predict") {
              llama_params[item.key()] = item.value();
-@@ -399,14 +565,33 @@ static json format_final_response_oaicompat(const json & request, const json & r
+@@ -399,14 +574,33 @@ static json format_final_response_oaicompat(const json & request, const json & r
          finish_reason = "stop";
      }
  

From 709ec298127b974195f7349cbeeb50b5621ea8b2 Mon Sep 17 00:00:00 2001
From: Rivaldo Freitas de Carvalho <rivaldo.freitas.106@gmail.com>
Date: Tue, 19 May 2026 12:42:15 -0300
Subject: [PATCH 5/5] Support streaming tool call conversion

---
 patches/llama-server-tools.patch | 127 +++++++++++++++++++++++++++----
 utils/apply_local_patches.py     |   5 ++
 2 files changed, 116 insertions(+), 16 deletions(-)

diff --git a/patches/llama-server-tools.patch b/patches/llama-server-tools.patch
index bd4681685..92d821a96 100644
--- a/patches/llama-server-tools.patch
+++ b/patches/llama-server-tools.patch
@@ -1,10 +1,106 @@
+diff --git a/examples/server/server.cpp b/examples/server/server.cpp
+index 18bcad3..520b558 100644
+--- a/examples/server/server.cpp
++++ b/examples/server/server.cpp
+@@ -3007,8 +3007,90 @@ int main(int argc, char ** argv) {
+
+             ctx_server.queue_results.remove_waiting_task_ids(task_ids);
+         } else {
+-            const auto chunked_content_provider = [task_ids, &ctx_server, completion_id](size_t, httplib::DataSink & sink) {
++            const bool stream_tools = data.contains("__oaicompat_tools");
++            const auto chunked_content_provider = [task_ids, &ctx_server, completion_id, stream_tools](size_t, httplib::DataSink & sink) {
++                std::string tool_stream_content;
++                bool sent_tool_role = false;
++
+                 ctx_server.receive_cmpl_results_stream(task_ids, [&](const server_task_result & result) -> bool {
++                    if (stream_tools) {
++                        std::time_t t = std::time(0);
++                        const std::string modelname = json_value(result.data, "model", std::string(DEFAULT_OAICOMPAT_MODEL));
++
++                        if (!sent_tool_role) {
++                            json role_chunk = json{
++                                {"choices", json::array({json{
++                                    {"finish_reason", nullptr},
++                                    {"index", 0},
++                                    {"delta", json{{"role", "assistant"}}}
++                                }})},
++                                {"created", t},
++                                {"id", completion_id},
++                                {"model", modelname},
++                                {"object", "chat.completion.chunk"}
++                            };
++                            if (!server_sent_event(sink, "data", role_chunk)) {
++                                return false;
++                            }
++                            sent_tool_role = true;
++                        }
++
++                        tool_stream_content += json_value(result.data, "content", std::string());
++
++                        const bool stopped_word  = json_value(result.data, "stopped_word",  false);
++                        const bool stopped_eos   = json_value(result.data, "stopped_eos",   false);
++                        const bool stopped_limit = json_value(result.data, "stopped_limit", false);
++                        if (!stopped_word && !stopped_eos && !stopped_limit) {
++                            return true;
++                        }
++
++                        std::string finish_reason = stopped_limit ? "length" : "stop";
++                        json tool_calls = parse_tool_calls_from_content(tool_stream_content, completion_id);
++                        if (!tool_calls.empty()) {
++                            finish_reason = "tool_calls";
++                            json tool_chunk = json{
++                                {"choices", json::array({json{
++                                    {"finish_reason", nullptr},
++                                    {"index", 0},
++                                    {"delta", json{{"tool_calls", tool_calls}}}
++                                }})},
++                                {"created", t},
++                                {"id", completion_id},
++                                {"model", modelname},
++                                {"object", "chat.completion.chunk"}
++                            };
++                            if (!server_sent_event(sink, "data", tool_chunk)) {
++                                return false;
++                            }
++                        } else if (!tool_stream_content.empty()) {
++                            json content_chunk = json{
++                                {"choices", json::array({json{
++                                    {"finish_reason", nullptr},
++                                    {"index", 0},
++                                    {"delta", json{{"content", tool_stream_content}}}
++                                }})},
++                                {"created", t},
++                                {"id", completion_id},
++                                {"model", modelname},
++                                {"object", "chat.completion.chunk"}
++                            };
++                            if (!server_sent_event(sink, "data", content_chunk)) {
++                                return false;
++                            }
++                        }
++
++                        json final_chunk = json{
++                            {"choices", json::array({json{{"finish_reason", finish_reason},
++                                                           {"index", 0},
++                                                           {"delta", json::object()}}})},
++                            {"created", t},
++                            {"id", completion_id},
++                            {"model", modelname},
++                            {"object", "chat.completion.chunk"}
++                        };
++                        return server_sent_event(sink, "data", final_chunk);
++                    }
++
+                     std::vector<json> result_array = format_partial_response_oaicompat(result.data, completion_id);
+                     for (auto & event_data : result_array) {
+                         if (event_data.empty()) {
 diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
-index 69519ef9..2f4e4866 100644
+index 69519ef..b5d5eea 100644
 --- a/examples/server/utils.hpp
 +++ b/examples/server/utils.hpp
 @@ -65,9 +65,21 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
          std::string role = json_value(curr_msg, "role", std::string(""));
- 
+
          std::string content;
 -        if (curr_msg.contains("content")) {
 +        if (role == "tool") {
@@ -35,7 +131,7 @@ index 69519ef9..2f4e4866 100644
          } else {
              throw std::runtime_error("Missing 'content' (ref: https://github.com/ggerganov/llama.cpp/issues/8367)");
          }
- 
+
 +        if (json_value(curr_msg, "role", std::string("")) == "tool") {
 +            content += "\n\nUse the tool result above to answer the user's request now. "
 +                       "Do not call another tool. Do not output a tool name, an Input line, "
@@ -44,11 +140,11 @@ index 69519ef9..2f4e4866 100644
 +
          chat.push_back({role, content});
      }
- 
-@@ -316,6 +336,152 @@ static bool server_sent_event(httplib::DataSink & sink, const char * event, cons
+
+@@ -316,6 +336,151 @@ static bool server_sent_event(httplib::DataSink & sink, const char * event, cons
      return sink.write(str.c_str(), str.size());
  }
- 
+
 +static std::string trim(const std::string & str) {
 +    const auto first = str.find_first_not_of(" \t\n\r");
 +    if (first == std::string::npos) {
@@ -197,9 +293,9 @@ index 69519ef9..2f4e4866 100644
  //
  // OAI utils
  //
-@@ -329,7 +495,13 @@ static json oaicompat_completion_params_parse(
+@@ -329,7 +494,13 @@ static json oaicompat_completion_params_parse(
      llama_params["__oaicompat"] = true;
- 
+
      // Apply chat template to the list of messages
 -    llama_params["prompt"] = format_chat(model, chat_template, body.at("messages"));
 +    json messages = body.at("messages");
@@ -209,19 +305,19 @@ index 69519ef9..2f4e4866 100644
 +        llama_params["__oaicompat_tools"] = body["tools"];
 +    }
 +    llama_params["prompt"] = format_chat(model, chat_template, messages);
- 
+
      // Handle "stop" field
      if (body.contains("stop") && body.at("stop").is_string()) {
-@@ -367,7 +539,7 @@ static json oaicompat_completion_params_parse(
+@@ -367,7 +538,7 @@ static json oaicompat_completion_params_parse(
      }
- 
+
      // Params supported by OAI but unsupported by llama.cpp
 -    static const std::vector<std::string> unsupported_params { "tools", "tool_choice" };
 +    static const std::vector<std::string> unsupported_params {};
      for (const auto & param : unsupported_params) {
          if (body.contains(param)) {
              throw std::runtime_error("Unsupported param: " + param);
-@@ -378,6 +550,9 @@ static json oaicompat_completion_params_parse(
+@@ -378,6 +549,9 @@ static json oaicompat_completion_params_parse(
      // This allows user to use llama.cpp-specific params like "mirostat", "tfs_z",... via OAI endpoint.
      // See "launch_slot_with_task()" for a complete list of params supported by llama.cpp
      for (const auto & item : body.items()) {
@@ -231,10 +327,10 @@ index 69519ef9..2f4e4866 100644
          // Exception: if "n_predict" is present, we overwrite the value specified earlier by "max_tokens"
          if (!llama_params.contains(item.key()) || item.key() == "n_predict") {
              llama_params[item.key()] = item.value();
-@@ -399,14 +574,33 @@ static json format_final_response_oaicompat(const json & request, const json & r
+@@ -399,14 +573,33 @@ static json format_final_response_oaicompat(const json & request, const json & r
          finish_reason = "stop";
      }
- 
+
 -    json choices =
 -        streaming ? json::array({json{{"finish_reason", finish_reason},
 -                                        {"index", 0},
@@ -270,6 +366,5 @@ index 69519ef9..2f4e4866 100644
 +                                            {"message", json{{"content", content},
 +                                                             {"role", "assistant"}}}}});
 +    }
- 
+
      std::time_t t = std::time(0);
- 
diff --git a/utils/apply_local_patches.py b/utils/apply_local_patches.py
index 912a972a2..190a0639d 100644
--- a/utils/apply_local_patches.py
+++ b/utils/apply_local_patches.py
@@ -131,6 +131,11 @@ def parse_unified_patch(patch: Path) -> Optional[list[FilePatch]]:
                 if line.startswith("\\ No newline"):
                     i += 1
                     continue
+                if line == "":
+                    old_lines.append("")
+                    new_lines.append("")
+                    i += 1
+                    continue
 
                 marker = line[:1]
                 value = line[1:]