From b094f57ef5bea483cfa6cc4ea2ff617b0a692c6d Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Tue, 23 Jul 2024 11:37:11 +0200 Subject: [PATCH 1/7] introduce basic spirv support --- platforms/artic/intrinsics_thorin.impala | 1 + src/opencl_platform.cpp | 38 ++++++++++++++++-------- src/opencl_platform.h | 1 + 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/platforms/artic/intrinsics_thorin.impala b/platforms/artic/intrinsics_thorin.impala index 93d28e60..d174d82d 100644 --- a/platforms/artic/intrinsics_thorin.impala +++ b/platforms/artic/intrinsics_thorin.impala @@ -15,6 +15,7 @@ #[import(cc = "thorin")] fn cuda(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); #[import(cc = "thorin")] fn nvvm(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); #[import(cc = "thorin")] fn opencl(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); +#[import(cc = "thorin")] fn opencl_spirv(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); #[import(cc = "thorin")] fn amdgpu_hsa(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); #[import(cc = "thorin")] fn amdgpu_pal(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); #[import(cc = "thorin")] fn reserve_shared[T](_size: i32) -> &mut addrspace(3)[T]; diff --git a/src/opencl_platform.cpp b/src/opencl_platform.cpp index 2b9d59ec..fe61c0d0 100644 --- a/src/opencl_platform.cpp +++ b/src/opencl_platform.cpp @@ -515,6 +515,17 @@ cl_program OpenCLPlatform::load_program_binary(DeviceId dev, const std::string& return program; } +cl_program OpenCLPlatform::load_program_il(DeviceId dev, const std::string& filename, const std::string& program_string) const { + const size_t program_length = program_string.length(); + const char* program_c_str = program_string.c_str(); + cl_int err = CL_SUCCESS; + cl_program program = clCreateProgramWithIL(devices_[dev].ctx, (const void*)program_c_str, program_length, &err); + CHECK_OPENCL(err, "clCreateProgramWithIL()"); + debug("Loading IL '%' for OpenCL device %", filename, dev); + + return program; +} + cl_program OpenCLPlatform::load_program_source(DeviceId dev, const std::string& filename, const std::string& program_string) const { const size_t program_length = program_string.length(); const char* program_c_str = program_string.c_str(); @@ -589,25 +600,28 @@ cl_kernel OpenCLPlatform::load_kernel(DeviceId dev, const std::string& filename, if (prog_it == prog_cache.end()) { opencl_dev.unlock(); - if (canonical.extension() != ".cl") - error("Incorrect extension for kernel file '%' (should be '.cl')", canonical.string()); - // load file from disk or cache auto src_path = canonical; if (opencl_dev.is_intel_fpga) src_path.replace_extension(".aocx"); std::string src_code = runtime_->load_file(src_path.string()); - // compile src or load from cache - std::string bin = opencl_dev.is_intel_fpga ? src_code : runtime_->load_from_cache(devices_[dev].platform_name + devices_[dev].device_name + src_code); - if (bin.empty()) { - program = load_program_source(dev, src_path.string(), src_code); - program = compile_program(dev, program, src_path.string()); - runtime_->store_to_cache(devices_[dev].platform_name + devices_[dev].device_name + src_code, program_as_string(program)); - } else { - program = load_program_binary(dev, src_path.string(), bin); + if (canonical.extension() == ".spv") { + program = load_program_il(dev, src_path.string(), src_code); program = compile_program(dev, program, src_path.string()); - } + } else if (canonical.extension() == ".cl") { + // compile src or load from cache + std::string bin = opencl_dev.is_intel_fpga ? src_code : runtime_->load_from_cache(devices_[dev].platform_name + devices_[dev].device_name + src_code); + if (bin.empty()) { + program = load_program_source(dev, src_path.string(), src_code); + program = compile_program(dev, program, src_path.string()); + runtime_->store_to_cache(devices_[dev].platform_name + devices_[dev].device_name + src_code, program_as_string(program)); + } else { + program = load_program_binary(dev, src_path.string(), bin); + program = compile_program(dev, program, src_path.string()); + } + } else + error("Incorrect extension for kernel file '%' (should be '.cl' or .'spv')", canonical.string()); opencl_dev.lock(); prog_cache[canonical.string()] = program; diff --git a/src/opencl_platform.h b/src/opencl_platform.h index 6f9d6c37..96b07f45 100644 --- a/src/opencl_platform.h +++ b/src/opencl_platform.h @@ -107,6 +107,7 @@ class OpenCLPlatform : public Platform { cl_kernel load_kernel(DeviceId dev, const std::string& filename, const std::string& kernelname); cl_program load_program_binary(DeviceId dev, const std::string& filename, const std::string& program_string) const; + cl_program load_program_il(DeviceId dev, const std::string& filename, const std::string& program_string) const; cl_program load_program_source(DeviceId dev, const std::string& filename, const std::string& program_string) const; cl_program compile_program(DeviceId dev, cl_program program, const std::string& filename) const; From e42a9d940851ca3eeb86fd8f7ec5c3343ca21447 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Fri, 8 Nov 2024 12:01:06 +0100 Subject: [PATCH 2/7] added spirv intrinsics file --- cmake/anydsl_runtime-config.cmake.in | 1 + platforms/artic/intrinsics_spirv.impala | 1 + src/CMakeLists.txt | 1 + 3 files changed, 3 insertions(+) create mode 100644 platforms/artic/intrinsics_spirv.impala diff --git a/cmake/anydsl_runtime-config.cmake.in b/cmake/anydsl_runtime-config.cmake.in index 0414fcbe..65045672 100644 --- a/cmake/anydsl_runtime-config.cmake.in +++ b/cmake/anydsl_runtime-config.cmake.in @@ -273,6 +273,7 @@ function(anydsl_runtime_wrap outfiles) ${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_nvvm.impala ${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_amdgpu.impala ${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_opencl.impala + ${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_spirv.impala ${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/intrinsics_thorin.impala ${AnyDSL_runtime_ROOT_DIR}/platforms/${_frontend}/runtime.impala ${_additional_platform_files}) diff --git a/platforms/artic/intrinsics_spirv.impala b/platforms/artic/intrinsics_spirv.impala new file mode 100644 index 00000000..570ff470 --- /dev/null +++ b/platforms/artic/intrinsics_spirv.impala @@ -0,0 +1 @@ +#[import(cc = "device", name = "spirv.builtin")] fn spirv_get_builtin[T](i32) -> T; \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 283eee25..507f1db9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -178,6 +178,7 @@ if(RUNTIME_JIT) ../platforms/${frontend}/intrinsics_nvvm.impala ../platforms/${frontend}/intrinsics_amdgpu.impala ../platforms/${frontend}/intrinsics_opencl.impala + ../platforms/${frontend}/intrinsics_spirv.impala ../platforms/${frontend}/intrinsics_thorin.impala ../platforms/${frontend}/intrinsics.impala ../platforms/${frontend}/runtime.impala) From 90a371da9f30e13debf35c235ce1dbfabb581fe1 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Fri, 8 Nov 2024 15:23:02 +0100 Subject: [PATCH 3/7] added an opencl spirv accelerator --- platforms/artic/intrinsics_opencl.impala | 37 ++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/platforms/artic/intrinsics_opencl.impala b/platforms/artic/intrinsics_opencl.impala index b2369746..ecfbec35 100644 --- a/platforms/artic/intrinsics_opencl.impala +++ b/platforms/artic/intrinsics_opencl.impala @@ -100,6 +100,43 @@ fn @opencl_accelerator(dev: i32) = Accelerator { barrier = @|| opencl_barrier(CLK_LOCAL_MEM_FENCE), }; +fn spv_cl_get_num_groups() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](24 /* BuiltInNumWorkgroups */); +fn spv_cl_get_local_size() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](25 /* BuiltInWorkgroupSize */); +fn spv_cl_get_group_id() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](26 /* BuiltInWorkgroupId */); +fn spv_cl_get_local_id() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](27 /* BuiltInLocalInvocationId */); +fn spv_cl_get_global_id() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](28 /* BuiltInGlobalInvocationId */); +fn spv_cl_get_global_size() = *spirv_get_builtin[&mut addrspace(8) simd[u64 * 3]](31 /* BuiltInGlobalSize */); + +fn @opencl_spirv_accelerator(dev: i32) = Accelerator { + exec = @|body| |grid, block| { + let work_item = WorkItem { + tidx = @|| spv_cl_get_local_id()(0) as i32, + tidy = @|| spv_cl_get_local_id()(1) as i32, + tidz = @|| spv_cl_get_local_id()(2) as i32, + bidx = @|| spv_cl_get_local_id()(0) as i32, + bidy = @|| spv_cl_get_group_id()(1) as i32, + bidz = @|| spv_cl_get_group_id()(2) as i32, + gidx = @|| spv_cl_get_global_id()(0) as i32, + gidy = @|| spv_cl_get_global_id()(1) as i32, + gidz = @|| spv_cl_get_global_id()(2) as i32, + bdimx = @|| spv_cl_get_local_size()(0) as i32, + bdimy = @|| spv_cl_get_local_size()(1) as i32, + bdimz = @|| spv_cl_get_local_size()(2) as i32, + gdimx = @|| spv_cl_get_global_size()(0) as i32, + gdimy = @|| spv_cl_get_global_size()(1) as i32, + gdimz = @|| spv_cl_get_global_size()(2) as i32, + nblkx = @|| spv_cl_get_num_groups()(0) as i32, + nblky = @|| spv_cl_get_num_groups()(1) as i32, + nblkz = @|| spv_cl_get_num_groups()(2) as i32 + }; + opencl_spirv(dev, grid, block, || @body(work_item)) + }, + sync = @|| synchronize_opencl(dev), + alloc = @|size| alloc_opencl(dev, size), + alloc_unified = @|size| alloc_opencl_unified(dev, size), + barrier = @|| opencl_barrier(CLK_LOCAL_MEM_FENCE), +}; + static opencl_intrinsics = Intrinsics { expf = opencl_expf, exp2f = opencl_exp2f, From ae4a4faecb143e6918deaacb8c912a0746e66750 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Fri, 8 Nov 2024 21:53:05 +0100 Subject: [PATCH 4/7] added global add atomic intrinsic for CL --- platforms/artic/intrinsics_opencl.impala | 1 + 1 file changed, 1 insertion(+) diff --git a/platforms/artic/intrinsics_opencl.impala b/platforms/artic/intrinsics_opencl.impala index ecfbec35..0a466111 100644 --- a/platforms/artic/intrinsics_opencl.impala +++ b/platforms/artic/intrinsics_opencl.impala @@ -55,6 +55,7 @@ #[import(cc = "device", name = "min")] fn opencl_min(i32, i32) -> i32; #[import(cc = "device", name = "max")] fn opencl_max(i32, i32) -> i32; #[import(cc = "device", name = "atomic_add")] fn opencl_atomic_add_global(&mut addrspace(1)i32, i32) -> i32; +#[import(cc = "device", name = "atomic_add")] fn opencl_atomic_add_global_f32(&mut addrspace(1)f32, f32) -> f32; #[import(cc = "device", name = "atomic_add")] fn opencl_atomic_add_shared(&mut addrspace(3)i32, i32) -> i32; #[import(cc = "device", name = "atomic_min")] fn opencl_atomic_min_global(&mut addrspace(1)i32, i32) -> i32; #[import(cc = "device", name = "atomic_min")] fn opencl_atomic_min_shared(&mut addrspace(3)i32, i32) -> i32; From bf49830f1f668506fa62905f1ddfe121810fe40b Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Wed, 13 Nov 2024 15:00:35 +0100 Subject: [PATCH 5/7] cl: simplify management of scratch buffers --- src/opencl_platform.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/opencl_platform.cpp b/src/opencl_platform.cpp index fe61c0d0..7f43dc19 100644 --- a/src/opencl_platform.cpp +++ b/src/opencl_platform.cpp @@ -366,7 +366,7 @@ void OpenCLPlatform::launch_kernel(DeviceId dev, const LaunchParams& launch_para auto kernel = load_kernel(dev, launch_params.file_name, launch_params.kernel_name); // set up arguments - std::vector kernel_structs(launch_params.num_args); + std::vector kernel_structs; for (uint32_t i = 0; i < launch_params.num_args; i++) { if (launch_params.args.types[i] == KernelArgType::Struct) { // create a buffer for each structure argument @@ -374,8 +374,8 @@ void OpenCLPlatform::launch_kernel(DeviceId dev, const LaunchParams& launch_para cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; cl_mem struct_buf = clCreateBuffer(devices_[dev].ctx, flags, launch_params.args.sizes[i], launch_params.args.data[i], &err); CHECK_OPENCL(err, "clCreateBuffer()"); - kernel_structs[i] = struct_buf; - clSetKernelArg(kernel, i, sizeof(cl_mem), &kernel_structs[i]); + kernel_structs.push_back(struct_buf); + clSetKernelArg(kernel, i, sizeof(cl_mem), &struct_buf); } else { #ifdef CL_VERSION_2_0 if (launch_params.args.types[i] == KernelArgType::Ptr && devices_[dev].version_major == 2) { @@ -421,11 +421,9 @@ void OpenCLPlatform::launch_kernel(DeviceId dev, const LaunchParams& launch_para dynamic_profile(dev, launch_params.file_name); // release temporary buffers for struct arguments - for (uint32_t i = 0; i < launch_params.num_args; i++) { - if (launch_params.args.types[i] == KernelArgType::Struct) { - cl_int err = clReleaseMemObject(kernel_structs[i]); - CHECK_OPENCL(err, "clReleaseMemObject()"); - } + for (auto tmp : kernel_structs) { + cl_int err = clReleaseMemObject(tmp); + CHECK_OPENCL(err, "clReleaseMemObject()"); } } From 12ac86382568bbe90d1955e85c261904736bb5d2 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Fri, 15 Nov 2024 13:08:44 +0100 Subject: [PATCH 6/7] guard OpenCL spir-v support against version requirements --- src/opencl_platform.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/opencl_platform.cpp b/src/opencl_platform.cpp index 7f43dc19..3136045c 100644 --- a/src/opencl_platform.cpp +++ b/src/opencl_platform.cpp @@ -514,6 +514,7 @@ cl_program OpenCLPlatform::load_program_binary(DeviceId dev, const std::string& } cl_program OpenCLPlatform::load_program_il(DeviceId dev, const std::string& filename, const std::string& program_string) const { +#if CL_VERSION_2_1 const size_t program_length = program_string.length(); const char* program_c_str = program_string.c_str(); cl_int err = CL_SUCCESS; @@ -522,6 +523,9 @@ cl_program OpenCLPlatform::load_program_il(DeviceId dev, const std::string& file debug("Loading IL '%' for OpenCL device %", filename, dev); return program; +#else + error("OpenCL 2.1 or later is required for SPIR-V support."); +#endif } cl_program OpenCLPlatform::load_program_source(DeviceId dev, const std::string& filename, const std::string& program_string) const { From 078dcaacd49fa949ac5ffefe699ec8ec64fd2240 Mon Sep 17 00:00:00 2001 From: Hugo Devillers Date: Fri, 22 Nov 2024 13:36:39 +0100 Subject: [PATCH 7/7] use different struct argument passing for spir-v kernels --- src/opencl_platform.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/opencl_platform.cpp b/src/opencl_platform.cpp index 3136045c..b26b3630 100644 --- a/src/opencl_platform.cpp +++ b/src/opencl_platform.cpp @@ -357,6 +357,12 @@ void time_kernel_callback(cl_event event, cl_int, void* data) { CHECK_OPENCL(err, "clReleaseEvent()"); } +static inline bool ends_with(std::string_view str, std::string_view suffix) { + if (str.size() < suffix.size()) + return false; + return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; +} + void OpenCLPlatform::launch_kernel(DeviceId dev, const LaunchParams& launch_params) { if (devices_[dev].is_intel_fpga && launch_params.num_args == 0) { debug("processing by autorun kernel"); @@ -364,11 +370,12 @@ void OpenCLPlatform::launch_kernel(DeviceId dev, const LaunchParams& launch_para } auto kernel = load_kernel(dev, launch_params.file_name, launch_params.kernel_name); + bool is_spirv = ends_with(launch_params.file_name, ".spv"); // set up arguments std::vector kernel_structs; for (uint32_t i = 0; i < launch_params.num_args; i++) { - if (launch_params.args.types[i] == KernelArgType::Struct) { + if (!is_spirv && launch_params.args.types[i] == KernelArgType::Struct) { // create a buffer for each structure argument cl_int err = CL_SUCCESS; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR;