Local-Agent-Runner/main.cpp at main · GeYugong/Local-Agent-Runner · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#define _CRT_SECURE_NO_WARNINGS
#include "llama.h"
#include <iostream>
#include <vector>
#include <string>
#include <cstring>
#include <cstdlib> // 用于 system() 函数

// --- 辅助函数：Token 转文字 ---
std::string my_token_to_piece(const llama_vocab* vocab, llama_token token) {
    std::vector<char> result(8, 0);
    int n = llama_token_to_piece(vocab, token, result.data(), result.size(), 0, true);
    if (n < 0) {
        result.resize(-n);
        n = llama_token_to_piece(vocab, token, result.data(), result.size(), 0, true);
    }
    return std::string(result.data(), n);
}

// --- 辅助函数：手动 Tokenize ---
std::vector<llama_token> my_tokenize(const llama_vocab* vocab, const std::string& text, bool add_special, bool parse_special) {
    int n_tokens = text.length() + 2;
    std::vector<llama_token> tokens(n_tokens);
    n_tokens = llama_tokenize(vocab, text.c_str(), text.length(), tokens.data(), tokens.size(), add_special, parse_special);
    if (n_tokens < 0) {
        tokens.resize(-n_tokens);
        n_tokens = llama_tokenize(vocab, text.c_str(), text.length(), tokens.data(), tokens.size(), add_special, parse_special);
    }
    tokens.resize(n_tokens);
    return tokens;
}

int main() {
    // 1. 初始化
    llama_backend_init();
    llama_numa_init(GGML_NUMA_STRATEGY_DISABLED);

    llama_model_params model_params = llama_model_default_params();
    model_params.n_gpu_layers = 99;

    // 你的模型绝对路径 (请确认路径无误)
    const char* model_path = "D:/0VS_user/Local-Agent-Runner/models/Meta-Llama-3-8B-Instruct-v0.1.Q4_K_M.gguf";

    std::cout << "Loading model..." << std::endl;
    llama_model* model = llama_model_load_from_file(model_path, model_params);
    if (!model) {
        std::cerr << "Failed to load model!" << std::endl;
        return 1;
    }
    const llama_vocab* vocab = llama_model_get_vocab(model);

    llama_context_params ctx_params = llama_context_default_params();
    ctx_params.n_ctx = 8192;
    llama_context* ctx = llama_init_from_model(model, ctx_params);

    // ==========================================
    // 2. 核心修改：Teach AI to use tools (Agent Protocol)
    // ==========================================
 /*   std::string system_prompt =
        "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
        "You are an intelligent Agent capable of controlling the Windows Terminal.\n"
        "If the user asks to perform a file operation (create folder, list files, etc.), "
        "you MUST output the Windows CMD command wrapped inside '<<<' and '>>>'.\n"
        "Example 1: User 'Create a folder named test', You: 'OK. <<< mkdir test >>>'\n"
        "Example 2: User 'List current files', You: 'Sure. <<< dir >>>'\n"
        "Do not use markdown code blocks for commands. Only use the <<< ... >>> format."
        "<|eot_id|>";*/
        // ==========================================
        // 2. 核心修改：升级版 Agent 协议 (支持中文、多步操作、写文件)
        // ==========================================
    std::string system_prompt =
        "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
        "You are an advanced Windows Terminal Agent. You can execute CMD commands based on user requests.\n"
        "Rules:\n"
        "1. If the user asks for an operation, output the valid Windows CMD command wrapped in '<<<' and '>>>'.\n"
        "2. For multi-step operations, use '&&' to chain commands.\n"
        "3. To write content to a file, use 'echo content > filename'.\n"
        "4. You must handle Chinese paths and instructions correctly.\n"
        "5. Always use 'mkdir -p' logic: When creating nested files, ensure the directory exists first. Example: 'mkdir path && echo content > path\\file'."
        "\n"
        "Examples:\n"
        "User: 'Create a folder named data inside test'\n"
        "AI: 'Sure. <<< mkdir test\\data >>>'\n"
        "\n"
        "User: 'Write \"hello\" to log.txt'\n"
        "AI: 'OK. <<< echo hello > log.txt >>>'\n"
        "\n"
        "User: 'Create folder A and enter it'\n"
        "AI: 'Done. <<< mkdir A && cd A >>>'\n"
        "\n"
        "User: '在D盘test文件夹下新建data.txt并写入123'\n"
        "AI: '没问题。 <<< mkdir D:\\test && echo 123 > D:\\test\\data.txt >>>'\n"
        "<|eot_id|>";

    // 3. 预处理 System Prompt
    std::vector<llama_token> tokens = my_tokenize(vocab, system_prompt, false, true);
    llama_batch batch = llama_batch_init(8192, 0, 1);

    for(size_t i=0; i<tokens.size(); i++){
        batch.token[i] = tokens[i];
        batch.pos[i] = i;
        batch.n_seq_id[i] = 1;
        batch.seq_id[i][0] = 0;
        batch.logits[i] = false;
    }
    batch.n_tokens = tokens.size();

    if (llama_decode(ctx, batch) != 0) return 1;
    int n_past = batch.n_tokens;

    std::cout << "\n=== Agent Ready. Try 'Create a folder named my_project' ===\n" << std::endl;

    while (true) {
        std::cout << "\n> User: ";
        std::string user_input;
        std::getline(std::cin, user_input);
        if (user_input == "exit" || user_input == "quit") break;

        std::string formatted_input = "<|start_header_id|>user<|end_header_id|>\n\n" + user_input + "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n";
        tokens = my_tokenize(vocab, formatted_input, false, true);

        batch.n_tokens = 0;
        for(size_t i=0; i<tokens.size(); i++){
            batch.token[i] = tokens[i];
            batch.pos[i] = n_past + i;
            batch.n_seq_id[i] = 1;
            batch.seq_id[i][0] = 0;
            batch.logits[i] = (i == tokens.size() - 1);
        }
        batch.n_tokens = tokens.size();

        if (llama_decode(ctx, batch) != 0) break;
        n_past += batch.n_tokens;

        std::cout << "> AI: ";
        std::string full_response = ""; // 用于缓存完整的回复，以便提取命令

        while (true) {
            auto* logits = llama_get_logits_ith(ctx, batch.n_tokens - 1);
            auto n_vocab_size = llama_vocab_n_tokens(vocab);

            llama_token new_token_id = 0;
            float max_prob = -1e9;
            for (int j = 0; j < n_vocab_size; j++) {
                if (logits[j] > max_prob) {
                    max_prob = logits[j];
                    new_token_id = j;
                }
            }

            if (llama_vocab_is_eog(vocab, new_token_id)) break;

            std::string piece = my_token_to_piece(vocab, new_token_id);
            std::cout << piece << std::flush;
            full_response += piece; // 累加回复

            batch.n_tokens = 0;
            batch.token[0] = new_token_id;
            batch.pos[0] = n_past;
            batch.n_seq_id[0] = 1;
            batch.seq_id[0][0] = 0;
            batch.logits[0] = true;
            batch.n_tokens = 1;

            if (llama_decode(ctx, batch) != 0) break;
            n_past++;
        }
        std::cout << std::endl;

        // ==========================================
        // 4. 解析并执行命令 (Agent Execution)
        // ==========================================
        size_t start_pos = full_response.find("<<<");
        size_t end_pos = full_response.find(">>>");

        if (start_pos != std::string::npos && end_pos != std::string::npos && end_pos > start_pos) {
            // 提取命令字符串
            std::string cmd = full_response.substr(start_pos + 3, end_pos - start_pos - 3);

            // 去除首尾空格
            size_t first = cmd.find_first_not_of(" \t");
            size_t last = cmd.find_last_not_of(" \t");
            if (first != std::string::npos) {
                cmd = cmd.substr(first, (last - first + 1));

                std::cout << "\n[System] Detecting command... Executing: " << cmd << std::endl;
                std::cout << "------------------------------------------------" << std::endl;

                // 【高危操作】真正执行命令
                int ret = system(cmd.c_str());

                std::cout << "------------------------------------------------" << std::endl;
                std::cout << "[System] Execution finished with code: " << ret << std::endl;
            }
        }
    }

    llama_batch_free(batch);
    llama_free(ctx);
    llama_model_free(model);
    llama_backend_free();
    return 0;
}