diff --git a/CHANGELOG.md b/CHANGELOG.md
index be4ade61df..90d79abbfa 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 
 - feat: Update llama.cpp to ggml-org/llama.cpp@d749821db
+- fix: model fails to load when chat template uses HuggingFace generation tags by @tobocop2 in #2226
 - docs: add contributing guide by @abetlen in #2229
 - chore: Migrate llama.cpp submodule URL to ggml-org/llama.cpp by @shalinib-ibm in #2034
 - fix: Enable unified KV cache for embedding contexts to preserve full per-sequence context in batch embedding calls by @SanjanaB123 in #2217
diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
index 1024fb85b9..f24b89f3ef 100644
--- a/llama_cpp/llama_chat_format.py
+++ b/llama_cpp/llama_chat_format.py
@@ -24,6 +24,7 @@
 )
 
 import jinja2
+from jinja2.ext import Extension
 from jinja2.sandbox import ImmutableSandboxedEnvironment
 
 import numpy as np
@@ -192,6 +193,15 @@ def __call__(
 
 
 class Jinja2ChatFormatter(ChatFormatter):
+    class IgnoreGenerationTags(Extension):
+        """Pass-through for HuggingFace's ``{% generation %}`` chat-template tag."""
+
+        tags = {"generation"}
+
+        def parse(self, parser: jinja2.parser.Parser):
+            parser.stream.skip(1)
+            return parser.parse_statements(("name:endgeneration",), drop_needle=True)
+
     def __init__(
         self,
         template: str,
@@ -213,6 +223,7 @@ def __init__(
             loader=jinja2.BaseLoader(),
             trim_blocks=True,
             lstrip_blocks=True,
+            extensions=[Jinja2ChatFormatter.IgnoreGenerationTags],
         ).from_string(self.template)
 
     @staticmethod