Skip to content

Commit 1c5633f

Browse files
committed
Merge remote-tracking branch 'contributor/feat/glm4-mtp-recursive' into glm4-moe-mtp
merge PR to sync latest additions (polished UX and recursive drafting) and rebase upstream
2 parents c2d7c76 + 7d782ab commit 1c5633f

File tree

1,511 files changed

+406245
-116258
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,511 files changed

+406245
-116258
lines changed

.clang-format

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,14 @@ AllowShortIfStatementsOnASingleLine: Never
2222
AllowShortLambdasOnASingleLine: Inline
2323
AllowShortLoopsOnASingleLine: false
2424
AlwaysBreakBeforeMultilineStrings: true
25-
BinPackArguments: false
25+
# Treat CUDA keywords/attributes as "attribute macros" and avoid breaking lines inside them
26+
AttributeMacros:
27+
- __host__
28+
- __device__
29+
- __global__
30+
- __forceinline__
31+
- __launch_bounds__
32+
BinPackArguments: true
2633
BinPackParameters: false # OnePerLine
2734
BitFieldColonSpacing: Both
2835
BreakBeforeBraces: Custom # Attach

.clang-tidy

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Checks: >
1717
clang-analyzer-*,
1818
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
1919
performance-*,
20+
-performance-enum-size,
2021
portability-*,
2122
-portability-simd-intrinsics,
2223
misc-*,

.devops/cann.Dockerfile

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,15 @@
33
# ==============================================================================
44

55
# Define the CANN base image for easier version updates later
6-
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.1.rc1-910b-openeuler22.03-py3.10
6+
ARG CHIP_TYPE=910b
7+
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc1.alpha001-${CHIP_TYPE}-openeuler22.03-py3.11
78

89
# ==============================================================================
910
# BUILD STAGE
1011
# Compile all binary files and libraries
1112
# ==============================================================================
1213
FROM ${CANN_BASE_IMAGE} AS build
1314

14-
# Define the Ascend chip model for compilation. Default is Ascend910B3
15-
ARG ASCEND_SOC_TYPE=Ascend910B3
16-
1715
# -- Install build dependencies --
1816
RUN yum install -y gcc g++ cmake make git libcurl-devel python3 python3-pip && \
1917
yum clean all && \
@@ -36,20 +34,21 @@ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
3634
# For brevity, only core variables are listed here. You can paste the original ENV list here.
3735

3836
# -- Build llama.cpp --
39-
# Use the passed ASCEND_SOC_TYPE argument and add general build options
37+
# Use the passed CHIP_TYPE argument and add general build options
38+
ARG CHIP_TYPE
4039
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
4140
&& \
4241
cmake -B build \
4342
-DGGML_CANN=ON \
4443
-DCMAKE_BUILD_TYPE=Release \
45-
-DSOC_TYPE=${ASCEND_SOC_TYPE} \
44+
-DSOC_TYPE=ascend${CHIP_TYPE} \
4645
. && \
4746
cmake --build build --config Release -j$(nproc)
4847

4948
# -- Organize build artifacts for copying in later stages --
5049
# Create a lib directory to store all .so files
5150
RUN mkdir -p /app/lib && \
52-
find build -name "*.so" -exec cp {} /app/lib \;
51+
find build -name "*.so*" -exec cp -P {} /app/lib \;
5352

5453
# Create a full directory to store all executables and Python scripts
5554
RUN mkdir -p /app/full && \

.devops/cloud-v-pipeline

Lines changed: 0 additions & 22 deletions
This file was deleted.

.devops/cpu.Dockerfile

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,27 +4,23 @@ FROM ubuntu:$UBUNTU_VERSION AS build
44

55
ARG TARGETARCH
66

7-
ARG GGML_CPU_ARM_ARCH=armv8-a
8-
97
RUN apt-get update && \
108
apt-get install -y build-essential git cmake libcurl4-openssl-dev
119

1210
WORKDIR /app
1311

1412
COPY . .
1513

16-
RUN if [ "$TARGETARCH" = "amd64" ]; then \
14+
RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
1715
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
18-
elif [ "$TARGETARCH" = "arm64" ]; then \
19-
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
2016
else \
2117
echo "Unsupported architecture"; \
2218
exit 1; \
2319
fi && \
2420
cmake --build build -j $(nproc)
2521

2622
RUN mkdir -p /app/lib && \
27-
find build -name "*.so" -exec cp {} /app/lib \;
23+
find build -name "*.so*" -exec cp -P {} /app/lib \;
2824

2925
RUN mkdir -p /app/full \
3026
&& cp build/bin/* /app/full \

.devops/cuda.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
2525
cmake --build build --config Release -j$(nproc)
2626

2727
RUN mkdir -p /app/lib && \
28-
find build -name "*.so" -exec cp {} /app/lib \;
28+
find build -name "*.so*" -exec cp -P {} /app/lib \;
2929

3030
RUN mkdir -p /app/full \
3131
&& cp build/bin/* /app/full \
@@ -61,7 +61,7 @@ RUN apt-get update \
6161
python3 \
6262
python3-pip \
6363
&& pip install --upgrade pip setuptools wheel \
64-
&& pip install -r requirements.txt \
64+
&& pip install --break-system-packages -r requirements.txt \
6565
&& apt autoremove -y \
6666
&& apt clean -y \
6767
&& rm -rf /tmp/* /var/tmp/* \

.devops/intel.Dockerfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
ARG ONEAPI_VERSION=2025.1.1-0-devel-ubuntu24.04
1+
ARG ONEAPI_VERSION=2025.2.2-0-devel-ubuntu24.04
22

33
## Build Image
44

5-
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
5+
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS build
66

77
ARG GGML_SYCL_F16=OFF
88
RUN apt-get update && \
@@ -21,7 +21,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
2121
cmake --build build --config Release -j$(nproc)
2222

2323
RUN mkdir -p /app/lib && \
24-
find build -name "*.so" -exec cp {} /app/lib \;
24+
find build -name "*.so*" -exec cp -P {} /app/lib \;
2525

2626
RUN mkdir -p /app/full \
2727
&& cp build/bin/* /app/full \
@@ -31,7 +31,7 @@ RUN mkdir -p /app/full \
3131
&& cp requirements.txt /app/full \
3232
&& cp .devops/tools.sh /app/full/tools.sh
3333

34-
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base
34+
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS base
3535

3636
RUN apt-get update \
3737
&& apt-get install -y libgomp1 curl\

.devops/musa.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=22.04
22
# This needs to generally match the container host's environment.
3-
ARG MUSA_VERSION=rc4.2.0
3+
ARG MUSA_VERSION=rc4.3.0
44
# Target the MUSA build image
55
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
66

@@ -32,7 +32,7 @@ RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
3232
cmake --build build --config Release -j$(nproc)
3333

3434
RUN mkdir -p /app/lib && \
35-
find build -name "*.so" -exec cp {} /app/lib \;
35+
find build -name "*.so*" -exec cp -P {} /app/lib \;
3636

3737
RUN mkdir -p /app/full \
3838
&& cp build/bin/* /app/full \

.devops/nix/package.nix

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
3535
enableCurl ? true,
3636
useVulkan ? false,
37+
useRpc ? false,
3738
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
3839

3940
# It's necessary to consistently use backendStdenv when building with CUDA support,
@@ -128,10 +129,6 @@ effectiveStdenv.mkDerivation (finalAttrs: {
128129
};
129130

130131
postPatch = ''
131-
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
132-
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
133-
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
134-
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
135132
'';
136133

137134
# With PR#6015 https://github.com/ggml-org/llama.cpp/pull/6015,
@@ -179,6 +176,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
179176
(cmakeBool "GGML_METAL" useMetalKit)
180177
(cmakeBool "GGML_VULKAN" useVulkan)
181178
(cmakeBool "GGML_STATIC" enableStatic)
179+
(cmakeBool "GGML_RPC" useRpc)
182180
]
183181
++ optionals useCuda [
184182
(

.devops/rocm.Dockerfile

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
ARG UBUNTU_VERSION=24.04
22

33
# This needs to generally match the container host's environment.
4-
ARG ROCM_VERSION=6.4
5-
ARG AMDGPU_VERSION=6.4
4+
ARG ROCM_VERSION=7.0
5+
ARG AMDGPU_VERSION=7.0
66

7-
# Target the CUDA build image
7+
# Target the ROCm build image
88
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
99

1010
### Build image
@@ -13,18 +13,14 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1313
# Unless otherwise specified, we make a fat build.
1414
# List from https://github.com/ggml-org/llama.cpp/pull/1087#issuecomment-1682807878
1515
# This is mostly tied to rocBLAS supported archs.
16-
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
17-
# gfx906 is deprecated
18-
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
16+
# gfx803, gfx900, gfx906, gfx1032, gfx1101, gfx1102,not officialy supported
17+
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
1918

20-
ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
21-
#ARG ROCM_DOCKER_ARCH=gfx1100
19+
ARG ROCM_DOCKER_ARCH='gfx803;gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1010;gfx1030;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1151'
20+
#ARG ROCM_DOCKER_ARCH='gfx1151'
2221

23-
# Set nvcc architectured
22+
# Set ROCm architectures
2423
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
25-
# Enable ROCm
26-
# ENV CC=/opt/rocm/llvm/bin/clang
27-
# ENV CXX=/opt/rocm/llvm/bin/clang++
2824

2925
RUN apt-get update \
3026
&& apt-get install -y \
@@ -40,11 +36,16 @@ WORKDIR /app
4036
COPY . .
4137

4238
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
43-
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
39+
cmake -S . -B build \
40+
-DGGML_HIP=ON \
41+
-DGGML_HIP_ROCWMMA_FATTN=ON \
42+
-DAMDGPU_TARGETS="$ROCM_DOCKER_ARCH" \
43+
-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON \
44+
-DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
4445
&& cmake --build build --config Release -j$(nproc)
4546

4647
RUN mkdir -p /app/lib \
47-
&& find build -name "*.so" -exec cp {} /app/lib \;
48+
&& find build -name "*.so*" -exec cp -P {} /app/lib \;
4849

4950
RUN mkdir -p /app/full \
5051
&& cp build/bin/* /app/full \

0 commit comments

Comments
 (0)