Skip to content

Commit b2b199c

Browse files
committed
feat(json): introduce ujson bridge and dual-backend JSON support
1 parent 2e7a25f commit b2b199c

46 files changed

Lines changed: 17271 additions & 45 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/build.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ on:
99
jobs:
1010
build:
1111
runs-on: ubuntu-latest
12+
strategy:
13+
matrix:
14+
backend: [nlohmann, rapidjson]
1215

1316
steps:
1417
- uses: actions/checkout@v3
@@ -22,7 +25,11 @@ jobs:
2225
run: |
2326
mkdir build
2427
cd build
25-
cmake ..
28+
if [ "${{ matrix.backend }}" = "rapidjson" ]; then
29+
cmake .. -DUJSON_USE_RAPIDJSON=ON
30+
else
31+
cmake ..
32+
fi
2633
make
2734
2835
- name: Run Tests

CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,13 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
88
add_library(jinja INTERFACE)
99
target_include_directories(jinja INTERFACE . third_party)
1010

11+
option(UJSON_USE_RAPIDJSON "Use RapidJSON backend" OFF)
12+
if(UJSON_USE_RAPIDJSON)
13+
add_definitions(-DUJSON_USE_RAPIDJSON)
14+
include_directories(third_party/rapidjson/include)
15+
endif()
16+
1117
# Test
1218
enable_testing()
1319
add_executable(test_main tests/test_main.cpp)
14-
target_link_libraries(test_main jinja)
20+
target_link_libraries(test_main jinja)

README.md

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,21 @@ It focuses on supporting the subset of Jinja2 used by modern Large Language Mode
1111
## Features
1212

1313
- **C++11 Compatible**: Ensures maximum compatibility across older compiler versions and embedded systems.
14-
- **Lightweight**: Minimal dependencies (only `nlohmann/json`).
14+
- **Flexible JSON Backend**: Supports both `nlohmann/json` (default) and `RapidJSON` via a unified `ujson` bridge.
15+
- **Lightweight**: Minimal dependencies, with all required headers included in `third_party/`.
1516
- **LLM Focused**: Native support for `messages`, `tools`, `add_generation_prompt`, and special tokens.
16-
- **Strictly Typed**: Uses `nlohmann::json` for context management.
17+
- **Unified Context**: Uses `jinja::json` (an alias to `ujson::json`) for seamless context management.
1718
- **Custom Function Interop**: Easily inject C++ functions (e.g., `strftime_now`) into templates.
18-
- **Robust**: Validated against official Python `transformers` outputs using fuzzy matching tests.
19+
- **Robust**: Validated against official Python `transformers` outputs using fuzzy matching tests on 390+ cases.
1920

2021
## Integration
2122

22-
The library is a single header file. Just copy `jinja.hpp` to your project's include directory (or root).
23+
### Headers
24+
The library consists of two main headers:
25+
- `jinja.hpp`: Core template engine.
26+
- `third_party/ujson.hpp`: Unified JSON bridge.
27+
28+
Just copy the `jinja.hpp` and `third_party` directory to your project.
2329

2430
### Feature Checking (Versioning)
2531

@@ -58,6 +64,13 @@ cmake ..
5864
make
5965
```
6066

67+
### Enable RapidJSON Backend
68+
To use `RapidJSON` instead of `nlohmann/json` for better performance:
69+
```bash
70+
cmake .. -DUJSON_USE_RAPIDJSON=ON
71+
```
72+
*Note: Ensure `third_party/rapidjson` is available.*
73+
6174
### Run Tests
6275

6376
The project includes a comprehensive test suite based on real-world model templates.
@@ -78,7 +91,7 @@ int main() {
7891
std::string template_str = "Hello {{ name }}!";
7992
jinja::Template tpl(template_str);
8093

81-
nlohmann::json context;
94+
jinja::json context;
8295
context["name"] = "World";
8396

8497
std::string result = tpl.render(context);
@@ -96,15 +109,15 @@ int main() {
96109
std::string chat_template_str = "...";
97110
jinja::Template tpl(chat_template_str);
98111

99-
nlohmann::json messages = nlohmann::json::array({
112+
jinja::json messages = jinja::json::array({
100113
{{"role", "user"}, {"content", "Hello!"}}
101114
});
102115

103116
// Apply template
104117
std::string prompt = tpl.apply_chat_template(
105118
messages,
106119
true, // add_generation_prompt
107-
nlohmann::json::array() // tools
120+
jinja::json::array() // tools
108121
);
109122
```
110123

@@ -113,7 +126,7 @@ std::string prompt = tpl.apply_chat_template(
113126
You can register custom C++ functions to be called from within the template.
114127

115128
```cpp
116-
tpl.add_function("strftime_now", [](const std::vector<nlohmann::json>& args) {
129+
tpl.add_function("strftime_now", [](const std::vector<jinja::json>& args) {
117130
// Return current time string
118131
return "2025-12-16";
119132
});

README_CN.md

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@
1111
## 特性
1212

1313
- **C++11 兼容**:确保在旧版编译器和嵌入式系统上的最大兼容性。
14-
- **易于集成**核心库仅包含**一个头文件** (`jinja.hpp`,位于根目录),非常方便拷贝并集成到任何项目中
15-
- **轻量级**:依赖极少 (仅依赖 `nlohmann/json`,已包含在项目中)
14+
- **灵活的 JSON 后端**通过统一的 `ujson` 桥接层支持 `nlohmann/json` (默认) 和 `RapidJSON`
15+
- **轻量级**:依赖极少,所有必要头文件均已包含在 `third_party/` 目录下
1616
- **专注 LLM**:原生支持 `messages`, `tools`, `add_generation_prompt` 以及特殊 token 的处理。
17-
- **类型安全**:使用 `nlohmann::json` 进行上下文管理
17+
- **统一上下文**:使用 `jinja::json` (即 `ujson::json` 的别名) 进行无缝的上下文管理
1818
- **自定义函数**:支持轻松注入 C++ 函数 (如 `strftime_now`) 到模板中。
19-
- **健壮性**通过模糊匹配测试,与官方 Python `transformers` 输出进行对齐验证
19+
- **健壮性**通过 390+ 条测试用例验证,与官方 Python `transformers` 输出进行对齐
2020

2121
## 支持的模型
2222

@@ -43,6 +43,13 @@ cmake ..
4343
make
4444
```
4545

46+
### 开启 RapidJSON 后端
47+
为了获得更好的性能,可以切换到 `RapidJSON` 后端:
48+
```bash
49+
cmake .. -DUJSON_USE_RAPIDJSON=ON
50+
```
51+
*注:请确保 `third_party/rapidjson` 存在。*
52+
4653
### 运行测试
4754

4855
本项目包含一个基于真实模型模板的全面测试套件。
@@ -63,7 +70,7 @@ int main() {
6370
std::string template_str = "Hello {{ name }}!";
6471
jinja::Template tpl(template_str);
6572

66-
nlohmann::json context;
73+
jinja::json context;
6774
context["name"] = "World";
6875

6976
std::string result = tpl.render(context);
@@ -81,15 +88,15 @@ int main() {
8188
std::string chat_template_str = "...";
8289
jinja::Template tpl(chat_template_str);
8390

84-
nlohmann::json messages = nlohmann::json::array({
91+
jinja::json messages = jinja::json::array({
8592
{{"role", "user"}, {"content", "你好!"}}
8693
});
8794

8895
// 应用模板
8996
std::string prompt = tpl.apply_chat_template(
9097
messages,
9198
true, // add_generation_prompt
92-
nlohmann::json::array() // tools
99+
jinja::json::array() // tools
93100
);
94101
```
95102

@@ -98,7 +105,7 @@ std::string prompt = tpl.apply_chat_template(
98105
你可以注册自定义 C++ 函数,供模板内部调用。
99106

100107
```cpp
101-
tpl.add_function("strftime_now", [](const std::vector<nlohmann::json>& args) {
108+
tpl.add_function("strftime_now", [](const std::vector<jinja::json>& args) {
102109
// 返回当前时间字符串
103110
return "2025-12-16";
104111
});

doc/implementation_details.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ The engine follows a standard compiler/interpreter pipeline:
2727
3. **AST (`Node` hierarchy)**:
2828
* Base `Node` class with virtual `render(Context&, string& out)` method.
2929
* Nodes: `TextNode`, `PrintNode`, `ForStmt`, `IfNode`, `SetNode`, `MacroNode`.
30-
* Expressions (`Expr` hierarchy) evaluate to `nlohmann::json` values.
30+
* Expressions (`Expr` hierarchy) evaluate to `jinja::json` values.
3131

3232
4. **Interpreter / Renderer (`Template::render`)**:
3333
* Iterates through root nodes and calls `render`.
@@ -65,8 +65,11 @@ The engine follows a standard compiler/interpreter pipeline:
6565

6666
## Key Implementation Features
6767

68-
### 1. JSON Data Model
69-
We utilize `nlohmann::json` as the unified data type for all variables. This simplifies type checking and allows easy integration with JSON-based LLM APIs.
68+
### 1. Unified JSON Bridge (`ujson`)
69+
We utilize a custom bridge layer called `ujson` (Universal JSON) to abstract the underlying JSON library.
70+
* **Default**: Uses `nlohmann/json` for ease of use and standard compliance.
71+
* **High Performance**: Supports `RapidJSON` (via `UJSON_USE_RAPIDJSON`) for faster parsing and reduced memory overhead, which is critical for high-throughput LLM serving.
72+
* **Abstraction**: All internal logic uses `ujson::json`, exposed as `jinja::json` to the user.
7073

7174
### 2. Custom Function / Filter Dispatch
7275
* **Filters**: Implemented in `FilterExpr`. Standard Jinja2 filters like `safe`, `tojson`, `trim`, `lower` are hardcoded.

doc/implementation_details_CN.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
3. **抽象语法树 (AST - `Node` 层次结构)**:
2828
* 基类 `Node` 具有虚函数 `render(Context&, string& out)`
2929
* 节点类型:`TextNode`, `PrintNode`, `ForStmt`, `IfNode`, `SetNode`, `MacroNode`
30-
* 表达式 (`Expr` 层次结构) 计算结果为 `nlohmann::json` 值。
30+
* 表达式 (`Expr` 层次结构) 计算结果为 `jinja::json` 值。
3131

3232
4. **解释器 / 渲染器 (Interpreter / Renderer - `Template::render`)**:
3333
* 遍历根节点并调用 `render`
@@ -65,8 +65,11 @@
6565

6666
## 关键实现特性
6767

68-
### 1. JSON 数据模型
69-
我们使用 `nlohmann::json` 作为所有变量的统一数据类型。这简化了类型检查,并允许与基于 JSON 的 LLM API 轻松集成。
68+
### 1. 统一 JSON 桥接层 (`ujson`)
69+
我们实现了一个名为 `ujson` (Universal JSON) 的轻量级桥接层,用于抽象底层的 JSON 库。
70+
* **默认配置**:使用 `nlohmann/json`,兼顾易用性和标准兼容性。
71+
* **高性能需求**:支持 `RapidJSON` 后端 (通过 `UJSON_USE_RAPIDJSON` 开启),提供更快的解析速度和更低的内存开销,这对于高并发的 LLM 推理服务至关重要。
72+
* **抽象封装**:内部所有逻辑均基于 `ujson::json` 编写,并通过 `jinja::json` 别名暴露给用户。
7073

7174
### 2. 自定义函数 / 过滤器分发
7275
* **过滤器 (Filters)**: 在 `FilterExpr` 中实现。标准的 Jinja2 过滤器如 `safe`, `tojson`, `trim`, `lower` 是硬编码的。

jinja.hpp

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,23 @@
1515
limitations under the License.
1616
*/
1717

18-
#pragma once
19-
2018
#include <string>
21-
#include <memory>
2219
#include <vector>
2320
#include <map>
2421
#include <functional>
25-
#include <algorithm>
22+
#include <memory>
2623
#include <sstream>
24+
#include <algorithm>
2725
#include <iostream>
26+
#include <fstream>
27+
#include <regex>
28+
#include <initializer_list>
29+
#include <ctime>
30+
#include <iomanip>
31+
#include <chrono>
2832

2933
// External dependency: nlohmann/json
30-
#include <nlohmann/json.hpp>
34+
#include "third_party/ujson.hpp"
3135

3236
#define JINJA_VERSION_MAJOR 0
3337
#define JINJA_VERSION_MINOR 0
@@ -42,8 +46,9 @@
4246

4347
namespace jinja {
4448

45-
using json = nlohmann::json;
46-
using json = nlohmann::json;
49+
using json = ujson::json;
50+
51+
4752
using Argument = std::pair<std::string, json>;
4853
using UserFunction = std::function<json(const std::vector<Argument>&)>;
4954

@@ -558,8 +563,6 @@ class Lexer {
558563

559564

560565
// --- Helpers ---
561-
using json = nlohmann::json;
562-
563566
class Context; // Forward declaration
564567

565568
struct Node {
@@ -571,7 +574,12 @@ struct Macro;
571574

572575
// Forward declarations
573576
static bool is_truthy(const json& val);
574-
static const json UNDEFINED = {{"__jinja_undefined__", true}};
577+
static json undefined_init() {
578+
json j = json::object();
579+
j["__jinja_undefined__"] = true;
580+
return j;
581+
}
582+
static const json UNDEFINED = undefined_init();
575583

576584
inline bool is_undefined(const json& val) {
577585
return val.is_object() && val.contains("__jinja_undefined__");
@@ -619,27 +627,25 @@ class Context {
619627
return nullptr;
620628
}
621629

622-
json& get(const std::string& name) {
630+
json get(const std::string& name) {
623631
// Search from top to bottom
624632
for (auto it = scopes.rbegin(); it != scopes.rend(); ++it) {
625633
if (it->contains(name)) {
626634
return (*it)[name];
627635
}
628636
}
629637
JINJA_LOG("Context: Variable '" << name << "' not found, returning UNDEFINED");
630-
static json undefined_val = UNDEFINED;
631-
return undefined_val;
638+
return UNDEFINED;
632639
}
633640

634-
const json& get(const std::string& name) const {
641+
json get(const std::string& name) const {
635642
// Const version
636643
for (auto it = scopes.rbegin(); it != scopes.rend(); ++it) {
637644
if (it->contains(name)) {
638645
return (*it)[name];
639646
}
640647
}
641-
static const json undefined_val = UNDEFINED;
642-
return undefined_val;
648+
return UNDEFINED;
643649
}
644650

645651
void set(const std::string& name, json val) {
@@ -1284,7 +1290,7 @@ struct SetNode : Node {
12841290
// But Expr::evaluate returns value. We need reference.
12851291
// Context needs to support getting reference.
12861292
if (auto* var = dynamic_cast<VarExpr*>(attr->object.get())) {
1287-
json& obj = context.get(var->name);
1293+
json obj = context.get(var->name);
12881294
if (!obj.is_null()) {
12891295
obj[attr->name] = val;
12901296
}

tests/test_main.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@
88
#include <regex>
99
// #define JINJA_DEBUG
1010
#include "jinja.hpp"
11-
#include <nlohmann/json.hpp>
1211
#include <chrono>
1312

14-
using json = nlohmann::json;
13+
using json = ujson::json;
1514

1615
namespace Color {
1716
const std::string RESET = "\033[0m";
@@ -92,7 +91,7 @@ int main(int argc, char** argv) {
9291
if (!model_filter.empty() && model_id.find(model_filter) == std::string::npos) {
9392
continue;
9493
}
95-
json& model_data = it.value();
94+
json model_data = it.value();
9695
total_models++;
9796

9897
std::cout << "\n" << Color::BLUE << Color::BOLD << "┏━━ Model: " << model_id << Color::RESET << std::endl;
@@ -225,4 +224,4 @@ int main(int argc, char** argv) {
225224
std::cout << "\n✨ All tests passed! ✨" << std::endl;
226225
return 0;
227226
}
228-
}
227+
}

0 commit comments

Comments
 (0)