Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/scripts/build-rocm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ if [ "${build_os:0:6}" == ubuntu ]; then
-w /src -v "$PWD:/src" "$image" sh -c \
"apt-get update \
&& pip install cmake==3.31.6 \
&& cmake -DCOMPUTE_BACKEND=hip -DCMAKE_BUILD_TYPE=MinSizeRel -DCMAKE_HIP_FLAGS=\"--offload-compress\" -DBNB_ROCM_ARCH=\"${bnb_rocm_arch}\" . \
&& cmake -DCOMPUTE_BACKEND=rocm -DCMAKE_BUILD_TYPE=MinSizeRel -DCMAKE_HIP_FLAGS=\"--offload-compress\" -DBNB_ROCM_ARCH=\"${bnb_rocm_arch}\" . \
&& cmake --build ."
fi

Expand Down
125 changes: 99 additions & 26 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# For GCC: `cmake -B build . && cmake --build build`
# For MSVC: `cmake -B build . && cmake --build build --config Release`
# You can also use the following options and variables
# - COMPUTE_BACKEND: Set to `cpu`, `cuda`, or `mps` to select the backend
# - COMPUTE_BACKEND: Set to `cpu`, `cuda`, `rocm` or `mps` to select the backend
# - CUDA_VERSION: The expected CUDA version, for sanity checking. The actual version
# is whatever CMake finds on your path.
# - COMPUTE_CAPABILITY: Which GPU Arch/Compute codes to provide to NVCC.
Expand All @@ -16,8 +16,8 @@
# libbitsandbytes_rocm70.so even if the system has ROCm 7.2.
cmake_minimum_required(VERSION 3.22.1)

# On Windows with HIP backend, auto-detect compilers from ROCM_PATH before project()
if(WIN32 AND COMPUTE_BACKEND STREQUAL "hip")
# On Windows with ROCm backend, auto-detect compilers from ROCM_PATH before project()
if(WIN32 AND COMPUTE_BACKEND STREQUAL "rocm")
if(DEFINED ENV{ROCM_PATH})
set(ROCM_PATH $ENV{ROCM_PATH})
endif()
Expand Down Expand Up @@ -61,8 +61,8 @@ set(XPU_FILES csrc/xpu_ops.cpp csrc/xpu_kernels.cpp)
# C++ sources are always included
list(APPEND SRC_FILES ${CPP_FILES})

set(COMPUTE_BACKEND "cpu" CACHE STRING "The compute backend to use (cpu, cuda, hip, mps, xpu)")
set_property(CACHE COMPUTE_BACKEND PROPERTY STRINGS cpu cuda hip mps xpu)
set(COMPUTE_BACKEND "cpu" CACHE STRING "The compute backend to use (cpu, cuda, rocm, mps, xpu)")
set_property(CACHE COMPUTE_BACKEND PROPERTY STRINGS cpu cuda rocm mps xpu)
option(PTXAS_VERBOSE "Pass through -v flag to PTX Assembler" OFF)

if(APPLE)
Expand All @@ -78,33 +78,33 @@ if(${COMPUTE_BACKEND} STREQUAL "cuda")
message(FATAL_ERROR "CUDA is not supported on macOS" )
endif()
set(BUILD_CUDA ON)
set(BUILD_HIP OFF)
set(BUILD_ROCM OFF)
set(BUILD_MPS OFF)
elseif(${COMPUTE_BACKEND} STREQUAL "hip")
elseif(${COMPUTE_BACKEND} STREQUAL "rocm")
if(APPLE)
message(FATAL_ERROR "HIP is not supported on macOS" )
message(FATAL_ERROR "ROCm is not supported on macOS" )
endif()
set(BUILD_CUDA OFF)
set(BUILD_HIP ON)
set(BUILD_ROCM ON)
set(BUILD_MPS OFF)
elseif(${COMPUTE_BACKEND} STREQUAL "mps")
if(NOT APPLE)
message(FATAL_ERROR "MPS is only supported on macOS" )
endif()
set(BUILD_CUDA OFF)
set(BUILD_HIP OFF)
set(BUILD_ROCM OFF)
set(BUILD_MPS ON)
elseif(${COMPUTE_BACKEND} STREQUAL "xpu")
if(APPLE)
message(FATAL_ERROR "XPU is not supported on macOS" )
endif()
set(BUILD_CUDA OFF)
set(BUILD_HIP OFF)
set(BUILD_ROCM OFF)
set(BUILD_MPS OFF)
set(BUILD_XPU ON)
else()
set(BUILD_CUDA OFF)
set(BUILD_HIP OFF)
set(BUILD_ROCM OFF)
set(BUILD_MPS OFF)
set(BUILD_XPU OFF)
set(BUILD_CPU ON)
Expand Down Expand Up @@ -228,7 +228,7 @@ if(BUILD_CUDA)

string(APPEND BNB_OUTPUT_NAME "_cuda${CUDA_VERSION_SHORT}")
add_compile_definitions(BUILD_CUDA)
elseif(BUILD_HIP)
elseif(BUILD_ROCM)
# Set target architectures before enable_language(HIP), which would otherwise
# auto-detect a single GPU and override the defaults.
if(DEFINED BNB_ROCM_ARCH)
Expand All @@ -247,23 +247,96 @@ elseif(BUILD_HIP)

string(APPEND BNB_OUTPUT_NAME "_rocm")

# get hip version
execute_process(COMMAND hipconfig --version OUTPUT_VARIABLE HIP_CONFIG_VERSION)
string(REGEX MATCH "[0-9]+\\.[0-9]+" HIP_VERSION "${HIP_CONFIG_VERSION}")
string(REPLACE "." "" HIP_VERSION_SHORT "${HIP_VERSION}")
# Always initialize this so downstream version checks are deterministic.
set(_DETECTED_ROCM_VERSION "0.0")
set(_ROCM_VERSION_SHORT "")

# Allow the user to skip all version detection by passing -DROCM_VERSION=<shortcode>
if(DEFINED ROCM_VERSION)
message(STATUS "ROCm Version: ${ROCM_VERSION} (user-supplied via -DROCM_VERSION)")
# Assume user-supplied ROCM_VERSION is a shortcode (e.g. 71).
set(_ROCM_VERSION_SHORT "${ROCM_VERSION}")
string(LENGTH "${ROCM_VERSION}" _ROCM_VERSION_LEN)
if(_ROCM_VERSION_LEN GREATER 1)
math(EXPR _ROCM_MAJOR_LEN "${_ROCM_VERSION_LEN} - 1")
string(SUBSTRING "${ROCM_VERSION}" 0 ${_ROCM_MAJOR_LEN} _ROCM_MAJOR)
string(SUBSTRING "${ROCM_VERSION}" ${_ROCM_MAJOR_LEN} 1 _ROCM_MINOR)
set(_DETECTED_ROCM_VERSION "${_ROCM_MAJOR}.${_ROCM_MINOR}")
else()
message(WARNING
"ROCM_VERSION='${ROCM_VERSION}' looks like a single digit. "
"Expected a two-digit shortcode (e.g. 71 for ROCm 7.1). "
"Interpreting as ${ROCM_VERSION}.0."
)
set(_DETECTED_ROCM_VERSION "${ROCM_VERSION}.0")
endif()
else()
# Detect the actual ROCm version.
# Prefer the .info/version file (the canonical ROCm version) because the
# HIP SDK version diverged from the ROCm version starting with ROCm 7.x
set(_DETECTED_ROCM_VERSION "")

# Resolve the ROCm installation root (same logic used later for find_package)
if(DEFINED ENV{ROCM_PATH})
set(_ROCM_ROOT "$ENV{ROCM_PATH}")
else()
if(WIN32)
message(WARNING
"ROCM_PATH environment variable is not set. "
"On Windows this is the primary way to locate the ROCm installation.\n"
"Falling back to C:/opt/rocm. Set ROCM_PATH if ROCm is installed elsewhere."
)
set(_ROCM_ROOT "C:/opt/rocm")
else()
set(_ROCM_ROOT "/opt/rocm")
endif()
endif()

# Expose a cache variable that the user can set to override the ROCm version in the library name
set(ROCM_VERSION "${HIP_VERSION_SHORT}" CACHE STRING "Expected ROCm Version Shortcode")
# Try <rocm_root>/.info/version
if(_ROCM_ROOT AND EXISTS "${_ROCM_ROOT}/.info/version")
file(READ "${_ROCM_ROOT}/.info/version" _ROCM_INFO_CONTENT)
string(STRIP "${_ROCM_INFO_CONTENT}" _ROCM_INFO_CONTENT)
string(REGEX MATCH "[0-9]+\\.[0-9]+" _DETECTED_ROCM_VERSION "${_ROCM_INFO_CONTENT}")
if(_DETECTED_ROCM_VERSION)
message(STATUS "ROCm Version: ${_DETECTED_ROCM_VERSION} (from ${_ROCM_ROOT}/.info/version)")
endif()
endif()

# Fall back to hipconfig --version (HIP SDK version) for older installs
if(NOT _DETECTED_ROCM_VERSION)
execute_process(COMMAND hipconfig --version OUTPUT_VARIABLE HIP_CONFIG_VERSION)
string(REGEX MATCH "[0-9]+\\.[0-9]+" _DETECTED_ROCM_VERSION "${HIP_CONFIG_VERSION}")

if(_DETECTED_ROCM_VERSION)
message(WARNING
"Could not read ROCm version from ${_ROCM_ROOT}/.info/version; "
"falling back to hipconfig (${_DETECTED_ROCM_VERSION}).\n"
"Starting with ROCm 7.x the HIP SDK version diverges from the ROCm version, "
"which may produce a misnamed library.\n"
"To fix this you can either:\n"
" - At build time: cmake -DROCM_VERSION=<shortcode> (e.g. -DROCM_VERSION=71 for ROCm 7.1)\n"
" - At runtime: export BNB_ROCM_VERSION=<shortcode> (e.g. BNB_ROCM_VERSION=71)"
)
else()
message(FATAL_ERROR
"Could not detect the ROCm version.\n"
"Checked:\n"
" 1. ${_ROCM_ROOT}/.info/version — not found or not readable\n"
" 2. hipconfig --version — not found or returned no version\n"
"Please install ROCm and ensure ROCM_PATH is set correctly, or\n"
)
endif()
endif()

message(STATUS "ROCm Version: ${HIP_VERSION_SHORT} (from hipconfig)")
if(NOT ROCM_VERSION STREQUAL "${HIP_VERSION_SHORT}")
message(WARNING "Overriding ROCm version in library name: ${HIP_VERSION_SHORT} -> ${ROCM_VERSION}")
string(REPLACE "." "" _DETECTED_ROCM_VERSION_SHORT "${_DETECTED_ROCM_VERSION}")
set(_ROCM_VERSION_SHORT "${_DETECTED_ROCM_VERSION_SHORT}")
set(ROCM_VERSION "${_DETECTED_ROCM_VERSION_SHORT}" CACHE STRING "Expected ROCm Version Shortcode")
endif()

string(APPEND BNB_OUTPUT_NAME "${ROCM_VERSION}")
string(APPEND BNB_OUTPUT_NAME "${_ROCM_VERSION_SHORT}")
add_compile_definitions(__HIP_PLATFORM_AMD__)
add_compile_definitions(__HIP_PLATFORM_HCC__)
add_compile_definitions(BUILD_HIP)
add_compile_definitions(BUILD_ROCM)
elseif(BUILD_MPS)
if(NOT APPLE)
message(FATAL_ERROR "MPS is only supported on macOS" )
Expand Down Expand Up @@ -354,7 +427,7 @@ if(BUILD_CUDA)
CUDA_SEPARABLE_COMPILATION ON
)
endif()
if(BUILD_HIP)
if(BUILD_ROCM)
# Determine ROCM_PATH from environment variable, fallback to /opt/rocm on Linux
if(DEFINED ENV{ROCM_PATH})
set(ROCM_PATH $ENV{ROCM_PATH})
Expand Down Expand Up @@ -391,7 +464,7 @@ if(BUILD_HIP)
set_source_files_properties(${GPU_FILES} PROPERTIES LANGUAGE HIP)
set_target_properties(bitsandbytes PROPERTIES LINKER_LANGUAGE CXX)

if(HIP_VERSION VERSION_LESS "6.1")
if(_DETECTED_ROCM_VERSION VERSION_LESS "6.1")
target_compile_definitions(bitsandbytes PUBLIC NO_HIPBLASLT)
else()
find_package(hipblaslt)
Expand Down
24 changes: 15 additions & 9 deletions bitsandbytes/cextension.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,28 +31,34 @@ def get_cuda_bnb_library_path(cuda_specs: CUDASpecs) -> Path:
prefix = "rocm" if torch.version.hip else "cuda"
library_name = f"libbitsandbytes_{prefix}{cuda_specs.cuda_version_string}{DYNAMIC_LIBRARY_SUFFIX}"

override_value = os.environ.get("BNB_CUDA_VERSION")
cuda_override_value = os.environ.get("BNB_CUDA_VERSION")
rocm_override_value = os.environ.get("BNB_ROCM_VERSION")

if rocm_override_value and torch.version.hip:
if rocm_override_value:
library_name = re.sub(r"rocm\d+", f"rocm{rocm_override_value}", library_name, count=1)
if torch.version.cuda:
raise RuntimeError(
f"BNB_ROCM_VERSION={rocm_override_value} detected for CUDA!\n"
"Use BNB_CUDA_VERSION instead: export BNB_CUDA_VERSION=<version>\n"
"Clear the variable and retry: unset BNB_ROCM_VERSION\n"
)
logger.warning(
f"WARNING: BNB_ROCM_VERSION={rocm_override_value} environment variable detected; loading {library_name}.\n"
"This can be used to load a bitsandbytes version built with a ROCm version that is different from the PyTorch ROCm version.\n"
"If this was unintended set the BNB_ROCM_VERSION variable to an empty string: export BNB_ROCM_VERSION=\n"
"If this was unintended clear the variable and retry: unset BNB_ROCM_VERSION\n"
)
elif override_value:
library_name = re.sub(r"cuda\d+", f"cuda{override_value}", library_name, count=1)
elif cuda_override_value:
library_name = re.sub(r"cuda\d+", f"cuda{cuda_override_value}", library_name, count=1)
if torch.version.hip:
raise RuntimeError(
f"BNB_CUDA_VERSION={override_value} detected for ROCm!! \n"
f"BNB_CUDA_VERSION={cuda_override_value} detected for ROCm!\n"
f"Use BNB_ROCM_VERSION instead: export BNB_ROCM_VERSION=<version>\n"
f"Clear the variable and retry: export BNB_CUDA_VERSION=\n"
f"Clear the variable and retry: unset BNB_CUDA_VERSION\n"
)
logger.warning(
f"WARNING: BNB_CUDA_VERSION={override_value} environment variable detected; loading {library_name}.\n"
f"WARNING: BNB_CUDA_VERSION={cuda_override_value} environment variable detected; loading {library_name}.\n"
"This can be used to load a bitsandbytes version built with a CUDA version that is different from the PyTorch CUDA version.\n"
"If this was unintended set the BNB_CUDA_VERSION variable to an empty string: export BNB_CUDA_VERSION=\n"
"If this was unintended clear the variable and retry: unset BNB_CUDA_VERSION\n"
)

return PACKAGE_DIR / library_name
Expand Down
12 changes: 9 additions & 3 deletions bitsandbytes/cuda_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,18 @@ def get_compute_capabilities() -> list[tuple[int, int]]:

@lru_cache(None)
def get_cuda_version_tuple() -> Optional[tuple[int, int]]:
"""Get CUDA/HIP version as a tuple of (major, minor)."""
"""Get CUDA/ROCm version as a tuple of (major, minor).

For ROCm, prefers ``torch.version.rocm`` (the actual ROCm version)
over ``torch.version.hip`` (the HIP SDK version) because the two
version lines diverged starting with ROCm 7.x. Falls back to
``torch.version.hip`` when the attribute is not yet available.
"""
try:
if torch.version.cuda:
version_str = torch.version.cuda
elif torch.version.hip:
version_str = torch.version.hip
version_str = getattr(torch.version, "rocm", None) or torch.version.hip
else:
return None

Expand All @@ -44,7 +50,7 @@ def get_cuda_version_tuple() -> Optional[tuple[int, int]]:


def get_cuda_version_string() -> Optional[str]:
"""Get CUDA/HIP version as a string."""
"""Get CUDA/ROCm version as a compact string (e.g. ``"120"`` or ``"71"``)."""
version_tuple = get_cuda_version_tuple()
if version_tuple is None:
return None
Expand Down
2 changes: 1 addition & 1 deletion bitsandbytes/diagnostics/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def _print_hip_runtime_diagnostics() -> None:
f"""
Found duplicate ROCm runtime files (see below).

We select the PyTorch default ROCm runtime, which is {torch.version.hip},
We select the PyTorch default ROCm runtime, which is {getattr(torch.version, "rocm", None) or torch.version.hip},
but this might mismatch with the ROCm version that is needed for bitsandbytes.
To override this behavior set the `BNB_ROCM_VERSION=<version string, e.g. 72>` environmental variable.

Expand Down
1 change: 1 addition & 0 deletions bitsandbytes/diagnostics/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def show_environment():

print(f"PyTorch: {torch.__version__}")
print(f" CUDA: {torch.version.cuda or 'N/A'}")
print(f" ROCm: {getattr(torch.version, 'rocm', 'N/A') or 'N/A'}")
print(f" HIP: {torch.version.hip or 'N/A'}")
print(f" XPU: {getattr(torch.version, 'xpu', 'N/A') or 'N/A'}")

Expand Down
9 changes: 5 additions & 4 deletions csrc/pythonInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
#include <cuda_runtime_api.h>
#include <ops.cuh>
#endif
#if BUILD_HIP
#if BUILD_ROCM
#include <hip/hip_runtime.h>
#include <ops.cuh>
#endif
#if BUILD_MPS
Expand All @@ -19,7 +20,7 @@
#include <cpu_ops.h>

// Compatibility between HIP/CUDA APIs
#if BUILD_HIP
#if BUILD_ROCM
#define cudaStream_t hipStream_t
#define __nv_bfloat16 hip_bfloat16
#define cublasLtHandle_t hipblasLtHandle_t
Expand All @@ -38,7 +39,7 @@
// UNMANGLED CALLS
//===================================================================================

#if BUILD_CUDA || BUILD_HIP
#if BUILD_CUDA || BUILD_ROCM

void gemm_4bit_inference_naive_fp16(
int m, int n, int k, half* A, unsigned char* B, float* absmax, float* datatype, half* out, int lda, int ldb,
Expand Down Expand Up @@ -334,7 +335,7 @@ void gemv_4bit_inference_fp32(
#endif

extern "C" {
#if BUILD_CUDA || BUILD_HIP
#if BUILD_CUDA || BUILD_ROCM

void cdequantize_blockwise_fp16_fp4(
float* code, unsigned char* A, float* absmax, half* out, int blocksize, const int n, cudaStream_t stream
Expand Down
2 changes: 1 addition & 1 deletion docs/source/installation.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ git clone https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bits

# Compile & install
apt-get install -y build-essential cmake # install build tools dependencies, unless present
cmake -DCOMPUTE_BACKEND=hip -S . # Use -DBNB_ROCM_ARCH="gfx90a;gfx942" to target specific gpu arch
cmake -DCOMPUTE_BACKEND=rocm -S . # Use -DBNB_ROCM_ARCH="gfx90a;gfx942" to target specific gpu arch
make
pip install -e . # `-e` for "editable" install, when developing BNB (otherwise leave that out)
```
Expand Down
Loading