Skip to content

Commit 403e5bd

Browse files
authored
Merge pull request #86 from l3utterfly/merge
merge from upstream
2 parents 67f6632 + be51510 commit 403e5bd

391 files changed

Lines changed: 112930 additions & 23753 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.devops/cann.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# Define the CANN base image for easier version updates later
66
ARG CHIP_TYPE=910b
7-
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc1.alpha001-${CHIP_TYPE}-openeuler22.03-py3.11
7+
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc2-${CHIP_TYPE}-openeuler24.03-py3.11
88

99
# ==============================================================================
1010
# BUILD STAGE
@@ -111,7 +111,7 @@ ENTRYPOINT ["/app/tools.sh"]
111111
# ==============================================================================
112112
FROM base AS light
113113

114-
COPY --from=build /app/full/llama-cli /app
114+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
115115

116116
ENTRYPOINT [ "/app/llama-cli" ]
117117

.devops/cpu.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ ENTRYPOINT ["/app/tools.sh"]
6868
### Light, CLI only
6969
FROM base AS light
7070

71-
COPY --from=build /app/full/llama-cli /app
71+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7272

7373
WORKDIR /app
7474

.devops/cuda.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ ENTRYPOINT ["/app/tools.sh"]
7474
### Light, CLI only
7575
FROM base AS light
7676

77-
COPY --from=build /app/full/llama-cli /app
77+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7878

7979
WORKDIR /app
8080

.devops/intel.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ ENTRYPOINT ["/app/tools.sh"]
7373
FROM base AS light
7474

7575
COPY --from=build /app/lib/ /app
76-
COPY --from=build /app/full/llama-cli /app
76+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7777

7878
WORKDIR /app
7979

.devops/musa.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ ENTRYPOINT ["/app/tools.sh"]
8181
### Light, CLI only
8282
FROM base AS light
8383

84-
COPY --from=build /app/full/llama-cli /app
84+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
8585

8686
WORKDIR /app
8787

.devops/rocm.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ ENTRYPOINT ["/app/tools.sh"]
9494
### Light, CLI only
9595
FROM base AS light
9696

97-
COPY --from=build /app/full/llama-cli /app
97+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
9898

9999
WORKDIR /app
100100

.devops/s390x.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ WORKDIR /llama.cpp/bin
105105

106106
# Copy llama.cpp binaries and libraries
107107
COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
108-
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin
108+
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin/llama-completion /llama.cpp/bin
109109

110110
ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
111111

.devops/tools.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
1313
exec ./llama-quantize "$@"
1414
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
1515
exec ./llama-cli "$@"
16+
elif [[ "$arg1" == '--run-legacy' || "$arg1" == '-l' ]]; then
17+
exec ./llama-completion "$@"
1618
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
1719
exec ./llama-bench "$@"
1820
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
@@ -32,8 +34,10 @@ elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
3234
else
3335
echo "Unknown command: $arg1"
3436
echo "Available commands: "
35-
echo " --run (-r): Run a model previously converted into ggml"
36-
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
37+
echo " --run (-r): Run a model (chat) previously converted into ggml"
38+
echo " ex: -m /models/7B/ggml-model-q4_0.bin"
39+
echo " --run-legacy (-l): Run a model (legacy completion) previously converted into ggml"
40+
echo " ex: -m /models/7B/ggml-model-q4_0.bin -no-cnv -p \"Building a website can be done in 10 simple steps:\" -n 512"
3741
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
3842
echo " ex: -m model.gguf"
3943
echo " --perplexity (-p): Measure the perplexity of a model over a given text."

.devops/vulkan.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ ENTRYPOINT ["/app/tools.sh"]
6868
### Light, CLI only
6969
FROM base AS light
7070

71-
COPY --from=build /app/full/llama-cli /app
71+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7272

7373
WORKDIR /app
7474

.github/ISSUE_TEMPLATE/011-bug-results.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ body:
1111
(i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
1212
If you encountered the issue while using an external UI (e.g. ollama),
1313
please reproduce your issue using one of the examples/binaries in this repository.
14-
The `llama-cli` binary can be used for simple and reproducible model inference.
14+
The `llama-completion` binary can be used for simple and reproducible model inference.
1515
- type: textarea
1616
id: version
1717
attributes:
@@ -74,9 +74,12 @@ body:
7474
Please give us a summary of the problem and tell us how to reproduce it.
7575
If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
7676
that information would be very much appreciated by us.
77+
78+
If possible, please try to reproduce the issue using `llama-completion` with `-fit off`.
79+
If you can only reproduce the issue with `-fit on`, please provide logs both with and without `--verbose`.
7780
placeholder: >
78-
e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
79-
When I use -ngl 0 it works correctly.
81+
e.g. when I run llama-completion with `-fa on` I get garbled outputs for very long prompts.
82+
With short prompts or `-fa off` it works correctly.
8083
Here are the exact commands that I used: ...
8184
validations:
8285
required: true

0 commit comments

Comments
 (0)