Skip to content

Commit 3f4e0f6

Browse files
authored
Merge pull request #93 from l3utterfly/master
merge from upstream
2 parents e7e5f9a + d006858 commit 3f4e0f6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+4173
-8028
lines changed

.devops/cuda-new.Dockerfile

Lines changed: 0 additions & 97 deletions
This file was deleted.

.devops/nix/package.nix

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
rocmPackages,
1717
vulkan-headers,
1818
vulkan-loader,
19-
curl,
19+
openssl,
2020
shaderc,
2121
useBlas ?
2222
builtins.all (x: !x) [
@@ -160,7 +160,8 @@ effectiveStdenv.mkDerivation (finalAttrs: {
160160
++ optionals useMpi [ mpi ]
161161
++ optionals useRocm rocmBuildInputs
162162
++ optionals useBlas [ blas ]
163-
++ optionals useVulkan vulkanBuildInputs;
163+
++ optionals useVulkan vulkanBuildInputs
164+
++ [ openssl ];
164165

165166
cmakeFlags =
166167
[

.github/workflows/build-self-hosted.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,27 @@ jobs:
213213
vulkaninfo --summary
214214
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
215215
216+
ggml-ci-win-intel-vulkan:
217+
runs-on: [self-hosted, Windows, X64, Intel]
218+
219+
steps:
220+
- name: Clone
221+
id: checkout
222+
uses: actions/checkout@v6
223+
224+
- name: Test
225+
id: ggml-ci
226+
shell: C:\msys64\usr\bin\bash.exe --noprofile --norc -eo pipefail "{0}"
227+
env:
228+
MSYSTEM: UCRT64
229+
CHERE_INVOKING: 1
230+
PATH: C:\msys64\ucrt64\bin;C:\msys64\usr\bin;C:\Windows\System32;${{ env.PATH }}
231+
run: |
232+
vulkaninfo --summary
233+
# Skip python related tests with GG_BUILD_LOW_PERF=1 since Windows MSYS2 UCRT64 currently fails to create
234+
# a valid python environment for testing
235+
LLAMA_FATAL_WARNINGS=OFF GG_BUILD_NINJA=1 GG_BUILD_VULKAN=1 GG_BUILD_LOW_PERF=1 ./ci/run.sh ./results/llama.cpp ./mnt/llama.cpp
236+
216237
ggml-ci-intel-openvino-gpu-low-perf:
217238
runs-on: [self-hosted, Linux, Intel, OpenVINO]
218239

.github/workflows/build.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,7 @@ jobs:
472472
cmake -B build -S . \
473473
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
474474
-DGGML_HIP_ROCWMMA_FATTN=ON \
475+
-DGPU_TARGETS="gfx1030" \
475476
-DGGML_HIP=ON
476477
cmake --build build --config Release -j $(nproc)
477478
@@ -990,6 +991,7 @@ jobs:
990991
-DROCM_DIR="${env:HIP_PATH}" `
991992
-DGGML_HIP=ON `
992993
-DGGML_HIP_ROCWMMA_FATTN=ON `
994+
-DGPU_TARGETS="gfx1100" `
993995
-DGGML_RPC=ON
994996
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
995997

.github/workflows/docker.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,10 @@ jobs:
7373
{ "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" },
7474
{ "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-arm" },
7575
{ "tag": "cpu", "dockerfile": ".devops/s390x.Dockerfile", "platforms": "linux/s390x", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-s390x" },
76-
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
77-
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
78-
{ "tag": "cuda13", "dockerfile": ".devops/cuda-new.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
79-
{ "tag": "cuda13", "dockerfile": ".devops/cuda-new.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
76+
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.9.1", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
77+
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.9.1", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
78+
{ "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.1.1", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
79+
{ "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.1.1", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
8080
{ "tag": "musa", "dockerfile": ".devops/musa.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
8181
{ "tag": "intel", "dockerfile": ".devops/intel.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
8282
{ "tag": "vulkan", "dockerfile": ".devops/vulkan.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" },

.github/workflows/hip-quality-check.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ jobs:
5959
run: |
6060
cmake -B build -S . \
6161
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
62-
-DGPU_TARGETS=gfx908 \
62+
-DGPU_TARGETS=gfx942 \
6363
-DGGML_HIP=ON \
6464
-DGGML_HIP_EXPORT_METRICS=Off \
6565
-DCMAKE_HIP_FLAGS="-Werror -Wno-tautological-compare" \

ci/run.sh

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,11 @@ if [ ! -z ${GG_BUILD_VULKAN} ]; then
119119
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=OFF -DGGML_BLAS=OFF"
120120
fi
121121

122+
# Build shared libs on Windows
123+
# to reduce binary size and avoid errors in library loading unit tests
124+
if uname -s | grep -qi nt; then
125+
CMAKE_EXTRA="${CMAKE_EXTRA} -DBUILD_SHARED_LIBS=ON"
126+
fi
122127
fi
123128

124129
if [ ! -z ${GG_BUILD_WEBGPU} ]; then
@@ -221,7 +226,7 @@ function gg_run_ctest_debug {
221226

222227
set -e
223228

224-
# Check cmake and ctest are installed
229+
# Check required binaries are installed
225230
gg_check_build_requirements
226231

227232
(cmake -G "${CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
@@ -252,7 +257,7 @@ function gg_run_ctest_release {
252257

253258
set -e
254259

255-
# Check cmake and ctest are installed
260+
# Check required binaries are installed
256261
gg_check_build_requirements
257262

258263
(cmake -G "${CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
@@ -627,10 +632,38 @@ function gg_sum_rerank_tiny {
627632
}
628633

629634
function gg_check_build_requirements {
635+
if ! command -v git &> /dev/null; then
636+
gg_printf 'git not found, please install'
637+
fi
638+
639+
if ! command -v git-lfs &> /dev/null; then
640+
gg_printf 'git-lfs not found, please install'
641+
fi
642+
643+
if ! command -v wget &> /dev/null; then
644+
gg_printf 'wget not found, please install'
645+
fi
646+
647+
if ! command -v python3 &> /dev/null; then
648+
gg_printf 'python3 not found, please install'
649+
fi
650+
651+
if ! command -v pip3 &> /dev/null; then
652+
gg_printf 'pip3 not found, please install'
653+
fi
654+
655+
if ! python3 -m ensurepip --help &> /dev/null; then
656+
gg_printf 'ensurepip not found, please install python3-venv package'
657+
fi
658+
630659
if ! command -v cmake &> /dev/null; then
631660
gg_printf 'cmake not found, please install'
632661
fi
633662

663+
if ! command -v ccache &> /dev/null; then
664+
gg_printf 'ccache not found, please consider installing for faster builds'
665+
fi
666+
634667
if ! command -v ctest &> /dev/null; then
635668
gg_printf 'ctest not found, please install'
636669
fi

common/arg.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1311,6 +1311,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
13111311
params.kv_unified = value;
13121312
}
13131313
).set_env("LLAMA_ARG_KV_UNIFIED").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_PERPLEXITY, LLAMA_EXAMPLE_BATCHED, LLAMA_EXAMPLE_BENCH, LLAMA_EXAMPLE_PARALLEL}));
1314+
add_opt(common_arg(
1315+
{"--clear-idle"},
1316+
{"--no-clear-idle"},
1317+
"save and clear idle slots on new task (default: enabled, requires unified KV and cache-ram)",
1318+
[](common_params & params, bool value) {
1319+
params.clear_idle = value;
1320+
}
1321+
).set_env("LLAMA_ARG_CLEAR_IDLE").set_examples({LLAMA_EXAMPLE_SERVER}));
13141322
add_opt(common_arg(
13151323
{"--context-shift"},
13161324
{"--no-context-shift"},

0 commit comments

Comments
 (0)