Skip to content

Commit 477db4a

Browse files
authored
Merge pull request #90 from l3utterfly/master
merge from upstream
2 parents 21499b0 + 17a4258 commit 477db4a

File tree

212 files changed

+3342
-1144
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

212 files changed

+3342
-1144
lines changed

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ Maintainers reserve the right to decline review or close pull requests for any r
159159
160160
# Code maintenance
161161
162-
- Existing code should have designated collaborators and/or maintainers specified in the [CODEOWNERS](CODEOWNERS) file reponsible for:
162+
- Existing code should have designated collaborators and/or maintainers specified in the [CODEOWNERS](CODEOWNERS) file responsible for:
163163
- Reviewing and merging related PRs
164164
- Fixing related bugs
165165
- Providing developer guidance/support

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
287287
| [IBM zDNN](docs/backend/zDNN.md) | IBM Z & LinuxONE |
288288
| [WebGPU [In Progress]](docs/build.md#webgpu) | All |
289289
| [RPC](https://github.com/ggml-org/llama.cpp/tree/master/tools/rpc) | All |
290-
| [Hexagon [In Progress]](docs/backend/hexagon/README.md) | Snapdragon |
290+
| [Hexagon [In Progress]](docs/backend/snapdragon/README.md) | Snapdragon |
291291
| [VirtGPU](docs/backend/VirtGPU.md) | VirtGPU APIR |
292292

293293
## Obtaining and quantizing models

common/arg.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2399,7 +2399,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23992399
params.fit_params = false;
24002400
} else {
24012401
throw std::runtime_error(
2402-
string_format("error: unkown value for --fit: '%s'\n", value.c_str()));
2402+
string_format("error: unknown value for --fit: '%s'\n", value.c_str()));
24032403
}
24042404
}
24052405
).set_env("LLAMA_ARG_FIT"));

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -869,7 +869,7 @@ std::string common_detokenize(
869869
// Embedding utils
870870
//
871871

872-
// TODO: repace embd_norm with an enum
872+
// TODO: replace embd_norm with an enum
873873
void common_embd_normalize(const float * inp, float * out, int n, int embd_norm);
874874

875875
float common_embd_similarity_cos(const float * embd1, const float * embd2, int n);

common/console.cpp

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ namespace console {
8080
static termios initial_state;
8181
#endif
8282

83+
static completion_callback completion_cb = nullptr;
84+
8385
//
8486
// Init and cleanup
8587
//
@@ -493,7 +495,7 @@ namespace console {
493495
}
494496

495497
static void set_line_contents(std::string new_line, std::string & line, std::vector<int> & widths, size_t & char_pos,
496-
size_t & byte_pos) {
498+
size_t & byte_pos, int cursor_byte_pos = -1) {
497499
move_to_line_start(char_pos, byte_pos, widths);
498500
clear_current_line(widths);
499501

@@ -503,6 +505,7 @@ namespace console {
503505
char_pos = 0;
504506

505507
size_t idx = 0;
508+
int back_width = 0;
506509
while (idx < line.size()) {
507510
size_t advance = 0;
508511
char32_t cp = decode_utf8(line, idx, advance);
@@ -511,8 +514,15 @@ namespace console {
511514
if (real_width < 0) real_width = 0;
512515
widths.push_back(real_width);
513516
idx += advance;
514-
++char_pos;
515-
byte_pos = idx;
517+
if (cursor_byte_pos >= 0 && static_cast<size_t>(cursor_byte_pos) < idx) {
518+
back_width += real_width;
519+
} else {
520+
++char_pos;
521+
byte_pos = idx;
522+
}
523+
}
524+
if (cursor_byte_pos >= 0) {
525+
move_cursor(-back_width);
516526
}
517527
}
518528

@@ -784,6 +794,20 @@ namespace console {
784794
break;
785795
}
786796

797+
if (completion_cb && input_char == '\t') {
798+
auto candidates = completion_cb(line, byte_pos);
799+
800+
if (!candidates.empty()) {
801+
if (candidates.size() > 1 || candidates[0].first != line) {
802+
// TODO?: Display all candidates
803+
set_line_contents(candidates[0].first, line, widths, char_pos, byte_pos, candidates[0].second);
804+
} else {
805+
// TODO: Move cursor to new byte_pos
806+
}
807+
continue;
808+
}
809+
}
810+
787811
if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D */) {
788812
end_of_stream = true;
789813
break;
@@ -1062,6 +1086,10 @@ namespace console {
10621086
return readline_advanced(line, multiline_input);
10631087
}
10641088

1089+
void set_completion_callback(completion_callback cb) {
1090+
completion_cb = cb;
1091+
}
1092+
10651093
namespace spinner {
10661094
static const char LOADING_CHARS[] = {'|', '/', '-', '\\'};
10671095
static std::condition_variable cv_stop;

common/console.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44

55
#include "common.h"
66

7+
#include <functional>
78
#include <string>
9+
#include <vector>
810

911
enum display_type {
1012
DISPLAY_TYPE_RESET = 0,
@@ -21,6 +23,9 @@ namespace console {
2123
void set_display(display_type display);
2224
bool readline(std::string & line, bool multiline_input);
2325

26+
using completion_callback = std::function<std::vector<std::pair<std::string, size_t>>(std::string_view, size_t)>;
27+
void set_completion_callback(completion_callback cb);
28+
2429
namespace spinner {
2530
void start();
2631
void stop();

common/debug.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ template <bool abort_on_nan> void common_debug_print_tensor(uint8_t * data, ggml
1818
// prints tensors that are processed in the computation graph
1919
// by default prints all tensors, but can be configured by creating a `base_callback_data` instance with
2020
// non-empty filter_patterns. See examples/debug.ccp for possible usage patterns
21-
// The template parameter determins whether an error should be thrown whenever a NaN is encountered
21+
// The template parameter determines whether an error should be thrown whenever a NaN is encountered
2222
// in a tensor (useful for stopping debug sessions on first erroneous tensor)
2323
// The callback data will be passed as the third parameter (user_data)
2424
template <bool abort_on_nan> bool common_debug_cb_eval(struct ggml_tensor * t, bool ask, void * user_data);

common/jinja/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ The llama.cpp Jinja engine introduces `jinja::string` (see `jinja/string.h`), wh
6363
- **One-to-many** (e.g., split): result is marked `is_input` **only if ALL** input parts are marked `is_input`
6464
- **Many-to-one** (e.g., join): same as one-to-many
6565

66-
For string concatenation, string parts will be appended to the new string as-is, while perserving the `is_input` flag.
66+
For string concatenation, string parts will be appended to the new string as-is, while preserving the `is_input` flag.
6767

6868
**Enabling Input Marking:**
6969

convert_hf_to_gguf.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4031,7 +4031,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
40314031
# split Conv3D into Conv2Ds
40324032
c1, c2, kt, kh, kw = data_torch.shape
40334033
del c1, c2, kh, kw # unused
4034-
assert kt == 2, "Current implmentation only support temporal_patch_size of 2"
4034+
assert kt == 2, "Current implementation only support temporal_patch_size of 2"
40354035
yield (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".weight" , data_torch[:, :, 0, ...])
40364036
yield (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".weight.1", data_torch[:, :, 1, ...])
40374037
else:
@@ -4842,12 +4842,12 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
48424842
yield from super().modify_tensors(data_torch, name, bid)
48434843

48444844

4845-
@ModelBase.register("Qwen3_5ForConditionalGeneration")
4845+
@ModelBase.register("Qwen3_5ForConditionalGeneration", "Qwen3_5ForCausalLM")
48464846
class Qwen3_5TextModel(_LinearAttentionVReorderBase):
48474847
model_arch = gguf.MODEL_ARCH.QWEN35
48484848

48494849

4850-
@ModelBase.register("Qwen3_5MoeForConditionalGeneration")
4850+
@ModelBase.register("Qwen3_5MoeForConditionalGeneration", "Qwen3_5MoeForCausalLM")
48514851
class Qwen3_5MoeTextModel(_LinearAttentionVReorderBase):
48524852
model_arch = gguf.MODEL_ARCH.QWEN35MOE
48534853

@@ -5404,7 +5404,7 @@ def set_gguf_parameters(self):
54045404
# Get ssm_d_conv from linear_attn_config.short_conv_kernel_size or ssm_d_conv
54055405
linear_attn_config = self.hparams["linear_attn_config"]
54065406
# n_head == 0 for KDA layers, n_head > 0 for MLA layers
5407-
# full_attention_layers list will be used to distingush layer type
5407+
# full_attention_layers list will be used to distinguish layer type
54085408
_num_kv_heads = list()
54095409
_full_attn_layers = linear_attn_config["full_attn_layers"]
54105410
for il in range(self.hparams["num_hidden_layers"]):
@@ -6505,7 +6505,7 @@ def set_gguf_parameters(self):
65056505
super().set_gguf_parameters()
65066506
hparams = self.hparams
65076507
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.GEMMA3)
6508-
# default values below are taken from HF tranformers code
6508+
# default values below are taken from HF transformers code
65096509
self.gguf_writer.add_vision_attention_layernorm_eps(hparams.get("layer_norm_eps", 1e-6))
65106510
self.gguf_writer.add_vision_use_gelu(True)
65116511
# calculate proj_scale_factor (used by tinygemma3 test model)
@@ -7097,7 +7097,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
70977097

70987098
if bid == 0 and "time_mix_a" in new_name:
70997099
# dummy v0/v1/v2 on first layer
7100-
# easist way to make llama happy
7100+
# easiest way to make llama happy
71017101
yield (new_name.replace("time_mix_a", "time_mix_v"), data_torch)
71027102

71037103
yield (new_name, data_torch)
@@ -9596,7 +9596,7 @@ def __init__(self, *args, **kwargs):
95969596
# NOTE: Explicitly include hparam prefix prefix for d_model to
95979597
# disambiguate with top-level head_dim
95989598
# NOTE 2: If needed for future models, this can be isolated in a method
9599-
# to separate the prefix setting and teh keys used
9599+
# to separate the prefix setting and the keys used
96009600
self.d_model = self.find_hparam([f"{self.hparam_prefixes[0]}_head_dim", "hidden_size", "d_model"])
96019601
self.n_group = self.find_hparam(["n_groups", "num_groups"])
96029602
self.d_inner = self.find_hparam(["expand", "num_heads"]) * self.d_model
@@ -9743,7 +9743,7 @@ def set_gguf_parameters(self):
97439743
self.gguf_writer.add_value_length(self.head_dim)
97449744

97459745
# Set feed_forward_length
9746-
# NOTE: This will trigger an override warning. This is preferrable to
9746+
# NOTE: This will trigger an override warning. This is preferable to
97479747
# duplicating all the parent logic
97489748
if not self.is_moe:
97499749
n_ff = self.find_hparam(["intermediate_size", "n_inner", "hidden_dim"])

docs/backend/CANN.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
**Llama.cpp + CANN**
2222

23-
The llama.cpp CANN backend is designed to support Ascend NPU. It utilize the ability of AscendC and ACLNN which are intergrated to CANN Toolkit and kernels to using Ascend NPU directly.
23+
The llama.cpp CANN backend is designed to support Ascend NPU. It utilize the ability of AscendC and ACLNN which are integrated to CANN Toolkit and kernels to using Ascend NPU directly.
2424

2525
## News
2626

@@ -210,7 +210,7 @@ docker run --name llamacpp --device /dev/davinci0 --device /dev/davinci_manager
210210
# and install driver.
211211
sudo sh Ascend-hdk-910b-npu-firmware_x.x.x.x.X.run --full
212212
```
213-
If the following messaage appers, firmware is installed successfully.
213+
If the following message appears, firmware is installed successfully.
214214
```sh
215215
Firmware package installed successfully!
216216
```

0 commit comments

Comments
 (0)