|
2 | 2 | #include "llama.h" |
3 | 3 | #include "log.h" |
4 | 4 | #include "utils/nitro_utils.h" |
| 5 | +#include <thread> |
5 | 6 |
|
6 | 7 | using namespace inferences; |
7 | 8 | using json = nlohmann::json; |
@@ -293,20 +294,38 @@ void llamaCPP::chatCompletion( |
293 | 294 | LOG_INFO << "Current completion text"; |
294 | 295 | LOG_INFO << formatted_output; |
295 | 296 | #endif |
296 | | - const int task_id = llama.request_completion(data, false, false, -1); |
| 297 | + int task_id; |
| 298 | + |
| 299 | + if (llama.params.n_parallel == 1) { |
| 300 | + while (true) { |
| 301 | + if (!single_queue_is_busy) { |
| 302 | + task_id = llama.request_completion(data, false, false, -1); |
| 303 | + single_queue_is_busy = true; |
| 304 | + break; |
| 305 | + } else { |
| 306 | + std::this_thread::sleep_for( |
| 307 | + std::chrono::milliseconds(100)); // Sleep for 500 milliseconds |
| 308 | + } |
| 309 | + } |
| 310 | + } else { |
| 311 | + task_id = llama.request_completion(data, false, false, -1); |
| 312 | + } |
| 313 | + |
297 | 314 | LOG_INFO << "Resolved request for task_id:" << task_id; |
298 | 315 |
|
299 | 316 | if (is_streamed) { |
300 | 317 | auto state = createState(task_id, this); |
301 | 318 |
|
302 | 319 | auto chunked_content_provider = |
303 | | - [state](char *pBuffer, std::size_t nBuffSize) -> std::size_t { |
| 320 | + [this, state](char *pBuffer, std::size_t nBuffSize) -> std::size_t { |
304 | 321 | if (!pBuffer) { |
305 | 322 | LOG_INFO << "Connection closed or buffer is null. Reset context"; |
306 | 323 | state->instance->llama.request_cancel(state->task_id); |
| 324 | + single_queue_is_busy = false; |
307 | 325 | return 0; |
308 | 326 | } |
309 | 327 | if (state->isStopped) { |
| 328 | + single_queue_is_busy = false; |
310 | 329 | return 0; |
311 | 330 | } |
312 | 331 |
|
@@ -339,8 +358,10 @@ void llamaCPP::chatCompletion( |
339 | 358 | } |
340 | 359 | return nRead; |
341 | 360 | } else { |
| 361 | + single_queue_is_busy = false; |
342 | 362 | return 0; |
343 | 363 | } |
| 364 | + single_queue_is_busy = false; |
344 | 365 | return 0; |
345 | 366 | }; |
346 | 367 | auto resp = nitro_utils::nitroStreamResponse(chunked_content_provider, |
|
0 commit comments