From a3e3cd1b4e9b66ba2f256b96831038cfa06f123f Mon Sep 17 00:00:00 2001 From: Stefan Frijters Date: Mon, 15 Aug 2022 19:01:50 +0200 Subject: [PATCH 01/23] [OKL] Extend allowed loop index types to include `size_t` and `ptrdiff_t` (#614) --- src/occa/internal/lang/modes/oklForStatement.cpp | 8 +++++--- tests/src/loops/forLoop.cpp | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/occa/internal/lang/modes/oklForStatement.cpp b/src/occa/internal/lang/modes/oklForStatement.cpp index d2076a670..0f40e31ba 100644 --- a/src/occa/internal/lang/modes/oklForStatement.cpp +++ b/src/occa/internal/lang/modes/oklForStatement.cpp @@ -96,15 +96,17 @@ namespace occa { variableDeclaration &decl = declSmnt.declarations[0]; iterator = &decl.variable(); initValue = decl.value; - // Valid types: {char, short, int, long} + // Valid types: {char, short, int, long, ptrdiff_t, size_t} const type_t *type = iterator->vartype.flatten().type; if (!type || ((*type != char_) && (*type != short_) && - (*type != int_))) { + (*type != int_) && + (*type != ptrdiff_t_) && + (*type != size_t_))) { if (printErrors) { iterator->printError(sourceStr() + "Iterator variable needs to be of type" - " [char, short, int, long]"); + " [char, short, int, long, ptrdiff_t, size_t]"); } return false; } diff --git a/tests/src/loops/forLoop.cpp b/tests/src/loops/forLoop.cpp index 735ed5e05..4b6a2418f 100644 --- a/tests/src/loops/forLoop.cpp +++ b/tests/src/loops/forLoop.cpp @@ -52,7 +52,7 @@ void testOuterForLoops(occa::device device) { .outer(length) .run(OCCA_FUNCTION(scope, [=](const int outerIndex) -> void { OKL("@inner"); - for (int i = 0; i < 2; ++i) { + for (long i = 0; i < 2; ++i) { const int globalIndex = i + (2 * outerIndex); output[globalIndex] = globalIndex; } @@ -86,7 +86,7 @@ void testOuterForLoops(occa::device device) { .outer(length, occa::range(length), indexArray) .run(OCCA_FUNCTION(scope, [=](const int3 outerIndex) -> void { OKL("@inner"); - for (int i = 0; i < 2; ++i) { + for (size_t i = 0; i < 2; ++i) { const int globalIndex = ( i + (2 * (outerIndex.z + length * (outerIndex.y + length * outerIndex.x))) ); From 7e5fd1cf155917c8db00ab2318ca1e1a36d925c3 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 16 Aug 2022 11:46:57 -0500 Subject: [PATCH 02/23] Fix dpcpp warnings (#617) * Update the namespace for features accepted into the SYCL 2020 spec. * Replace deprecated barrier calls with `group_barrier`. --- .github/workflows/build.yml | 4 ++-- src/occa/internal/lang/expr/dpcppAtomicNode.cpp | 10 +++++----- src/occa/internal/lang/modes/dpcpp.cpp | 4 +--- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a7efb9d5a..51c6aae35 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -165,7 +165,7 @@ jobs: - name: Run CTests if: ${{ matrix.useCMake && !matrix.useoneAPI }} run: | - ctest --test-dir build --progress --output-on-failure --parallel 8 --schedule-random -E "examples_cpp_arrays-opencl|examples_cpp_for_loops-opencl|examples_cpp_generic_inline_kernel-opencl|examples_cpp_shared_memory-opencl|examples_cpp_nonblocking_streams-opencl|examples_cpp_shared_memory-dpcpp|examples_cpp_nonblocking_streams-dpcpp|examples_cpp_for_loops-dpcpp|examples_cpp_arrays-dpcpp" + ctest --test-dir build --progress --output-on-failure --parallel 8 --schedule-random -E "examples_cpp_arrays-opencl|examples_cpp_for_loops-opencl|examples_cpp_generic_inline_kernel-opencl|examples_cpp_shared_memory-opencl|examples_cpp_for_loops-dpcpp|examples_cpp_arrays-dpcpp|examples_cpp_nonblocking_streams-dpcpp" - name: Run CTests @@ -176,7 +176,7 @@ jobs: run: | source /opt/intel/oneapi/setvars.sh export SYCL_DEVICE_FILTER=opencl.cpu - ctest --test-dir build --progress --output-on-failure --parallel 8 --schedule-random -E "examples_cpp_arrays-opencl|examples_cpp_for_loops-opencl|examples_cpp_generic_inline_kernel-opencl|examples_cpp_shared_memory-opencl|examples_cpp_nonblocking_streams-opencl|examples_cpp_shared_memory-dpcpp|examples_cpp_nonblocking_streams-dpcpp|examples_cpp_for_loops-dpcpp|examples_cpp_arrays-dpcpp" + ctest --test-dir build --progress --output-on-failure --parallel 8 --schedule-random -E "examples_cpp_arrays-opencl|examples_cpp_for_loops-opencl|examples_cpp_generic_inline_kernel-opencl|examples_cpp_shared_memory-opencl|examples_cpp_for_loops-dpcpp|examples_cpp_arrays-dpcpp|examples_cpp_nonblocking_streams-dpcpp" - name: Upload code coverage if: ${{ matrix.OCCA_COVERAGE }} diff --git a/src/occa/internal/lang/expr/dpcppAtomicNode.cpp b/src/occa/internal/lang/expr/dpcppAtomicNode.cpp index b560804c3..849555e3e 100644 --- a/src/occa/internal/lang/expr/dpcppAtomicNode.cpp +++ b/src/occa/internal/lang/expr/dpcppAtomicNode.cpp @@ -32,13 +32,13 @@ namespace occa void dpcppAtomicNode::print(printer &pout) const { - pout << "sycl::ext::oneapi::atomic_ref<"; + pout << "sycl::atomic_ref<"; // Currently CUDA only supports atomics on fundamental types: // assume that we can safefuly ignore the pointer types for now // and simply print the typename. pout << atomic_type.name() << ","; - pout << "sycl::ext::oneapi::memory_order::relaxed,"; + pout << "sycl::memory_order::relaxed,"; // The SYCL standard states, // @@ -49,15 +49,15 @@ namespace occa // Currently OCCA does not address system-wide atomics; // therefore, assume for now that we can always safely // use `memory_scope::device`. - pout << "sycl::ext::oneapi::memory_scope::device,"; + pout << "sycl::memory_scope::device,"; if(atomic_type.hasAttribute("shared")) { - pout << "sycl::access::address_space::global_space"; + pout << "sycl::access::address_space::local_space"; } else { - pout << "sycl::access::address_space::local_space"; + pout << "sycl::access::address_space::global_space"; } pout << ">("; diff --git a/src/occa/internal/lang/modes/dpcpp.cpp b/src/occa/internal/lang/modes/dpcpp.cpp index 4c5268240..09b4b2dbc 100644 --- a/src/occa/internal/lang/modes/dpcpp.cpp +++ b/src/occa/internal/lang/modes/dpcpp.cpp @@ -132,20 +132,18 @@ namespace occa // } } - // @note: As of SYCL 2020 this will need to change to `group_barrier(it.group())` void dpcppParser::addBarriers() { statementArray::from(root) .flatFilterByStatementType(statementType::empty, "barrier") .forEach([&](statement_t *smnt) { - // TODO 1.1: Implement proper barriers emptyStatement &emptySmnt = (emptyStatement &)*smnt; statement_t &barrierSmnt = (*(new sourceCodeStatement( emptySmnt.up, emptySmnt.source, - "item_.barrier(sycl::access::fence_space::local_space);"))); + "group_barrier(item_.get_group());"))); emptySmnt.replaceWith(barrierSmnt); From 7c98d0338f9d59a494e3b421b65b3419744c6f42 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 16 Aug 2022 15:36:31 -0500 Subject: [PATCH 03/23] Update build.yml --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 51c6aae35..f181d42e2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -165,7 +165,7 @@ jobs: - name: Run CTests if: ${{ matrix.useCMake && !matrix.useoneAPI }} run: | - ctest --test-dir build --progress --output-on-failure --parallel 8 --schedule-random -E "examples_cpp_arrays-opencl|examples_cpp_for_loops-opencl|examples_cpp_generic_inline_kernel-opencl|examples_cpp_shared_memory-opencl|examples_cpp_for_loops-dpcpp|examples_cpp_arrays-dpcpp|examples_cpp_nonblocking_streams-dpcpp" + ctest --test-dir build --progress --output-on-failure --parallel 8 --schedule-random -E "examples_cpp_arrays-opencl|examples_cpp_for_loops-opencl|examples_cpp_generic_inline_kernel-opencl|examples_cpp_shared_memory-opencl|examples_cpp_nonblocking_streams-opencl|examples_cpp_for_loops-dpcpp|examples_cpp_arrays-dpcpp|examples_cpp_nonblocking_streams-dpcpp" - name: Run CTests @@ -176,7 +176,7 @@ jobs: run: | source /opt/intel/oneapi/setvars.sh export SYCL_DEVICE_FILTER=opencl.cpu - ctest --test-dir build --progress --output-on-failure --parallel 8 --schedule-random -E "examples_cpp_arrays-opencl|examples_cpp_for_loops-opencl|examples_cpp_generic_inline_kernel-opencl|examples_cpp_shared_memory-opencl|examples_cpp_for_loops-dpcpp|examples_cpp_arrays-dpcpp|examples_cpp_nonblocking_streams-dpcpp" + ctest --test-dir build --progress --output-on-failure --parallel 8 --schedule-random -E "examples_cpp_arrays-opencl|examples_cpp_for_loops-opencl|examples_cpp_generic_inline_kernel-opencl|examples_cpp_shared_memory-opencl|examples_cpp_nonblocking_streams-opencl|examples_cpp_for_loops-dpcpp|examples_cpp_arrays-dpcpp|examples_cpp_nonblocking_streams-dpcpp" - name: Upload code coverage if: ${{ matrix.OCCA_COVERAGE }} From 5f7e4eae2fc76990509a7d7df312d01e605dc61d Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 16 Aug 2022 23:12:25 +0000 Subject: [PATCH 04/23] Add Fortran to CI pipeline. --- .github/workflows/build.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f181d42e2..f0215e2f9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -26,8 +26,10 @@ jobs: CC: gcc-9 CXX: g++-9 CXXFLAGS: -Wno-maybe-uninitialized -Wno-cpp + FC: gfortran-9 GCOV: gcov-9 OCCA_COVERAGE: 1 + OCCA_FORTRAN_ENABLED: 1 useCMake: true - name: "[Ubuntu] clang-11" @@ -49,9 +51,10 @@ jobs: CC: icx CXX: icpx CXXFLAGS: -Wno-uninitialized - FC: ifx + FC: ifort GCOV: gcov-9 OCCA_COVERAGE: 1 + OCCA_FORTRAN_ENABLED: 1 useCMake: true useoneAPI: true @@ -60,8 +63,10 @@ jobs: CC: gcc-9 CXX: g++-9 CXXFLAGS: -Wno-maybe-uninitialized + FC: gfortran-9 GCOV: gcov-9 OCCA_COVERAGE: 1 + OCCA_FORTRAN_ENABLED: 1 - name: "[MacOS] clang" os: macos-latest From 0ed7c16998cf7f78eaaf6ed0699eddd75b745149 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 16 Aug 2022 18:36:24 -0500 Subject: [PATCH 05/23] Update build.yml --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f0215e2f9..96349da0f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -63,10 +63,8 @@ jobs: CC: gcc-9 CXX: g++-9 CXXFLAGS: -Wno-maybe-uninitialized - FC: gfortran-9 GCOV: gcov-9 OCCA_COVERAGE: 1 - OCCA_FORTRAN_ENABLED: 1 - name: "[MacOS] clang" os: macos-latest @@ -121,7 +119,8 @@ jobs: -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DCMAKE_INSTALL_PREFIX=install \ -DENABLE_TESTS=ON \ - -DENABLE_EXAMPLES=ON + -DENABLE_EXAMPLES=ON \ + -DENABLE_FORTRAN=ON - name: CMake configure if: ${{ matrix.useCMake && matrix.useoneAPI}} @@ -135,6 +134,7 @@ jobs: -DCMAKE_INSTALL_PREFIX=install \ -DENABLE_TESTS=ON \ -DENABLE_EXAMPLES=ON \ + -DENABLE_FORTRAN=ON \ -DCMAKE_PREFIX_PATH="/opt/intel/oneapi/compiler/latest/linux;/opt/intel/oneapi/compiler/latest/linux/compiler" - name: CMake build From a23bd70f8b3984e40c77841a722e54bf5cc5c9d1 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 16 Aug 2022 18:44:09 -0500 Subject: [PATCH 06/23] Update build.yml --- .github/workflows/build.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 96349da0f..5ff935c41 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -118,6 +118,9 @@ jobs: cmake -S . -B build \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DCMAKE_INSTALL_PREFIX=install \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DCMAKE_Fortran_COMPILER=${FC} \ -DENABLE_TESTS=ON \ -DENABLE_EXAMPLES=ON \ -DENABLE_FORTRAN=ON @@ -132,6 +135,9 @@ jobs: cmake -S . -B build \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DCMAKE_INSTALL_PREFIX=install \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DCMAKE_Fortran_COMPILER=${FC} \ -DENABLE_TESTS=ON \ -DENABLE_EXAMPLES=ON \ -DENABLE_FORTRAN=ON \ From fcc43c559232cb753e18d8326bd3f9e9c095db50 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 16 Aug 2022 18:55:51 -0500 Subject: [PATCH 07/23] Update build.yml --- .github/workflows/build.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5ff935c41..9889d038a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -106,7 +106,8 @@ jobs: shell: bash run: | sudo apt update - sudo apt install intel-oneapi-compiler-dpcpp-cpp + sudo apt install intel-oneapi-compiler-dpcpp-cpp + sudo apt install intel-oneapi-compiler-fortran - name: Compiler info if: ${{ !matrix.useCMake }} From a9103f0cbc960c904b307267552b80a7bfdf45c5 Mon Sep 17 00:00:00 2001 From: Stefan Frijters Date: Thu, 18 Aug 2022 18:47:55 +0200 Subject: [PATCH 08/23] Show compiler output for kernel if verbose is true (#619) This behaviour was changed for CUDA in the v1.3 release, but getting e.g. information about register spilling is very useful. Also implement similar behaviour for HIP and Metal. --- src/occa/internal/modes/cuda/device.cpp | 2 ++ src/occa/internal/modes/hip/device.cpp | 2 ++ src/occa/internal/modes/metal/device.cpp | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/src/occa/internal/modes/cuda/device.cpp b/src/occa/internal/modes/cuda/device.cpp index d99a67f47..85471ec1a 100644 --- a/src/occa/internal/modes/cuda/device.cpp +++ b/src/occa/internal/modes/cuda/device.cpp @@ -319,6 +319,8 @@ namespace occa { << "Output:\n\n" << commandOutput << "\n" ); + } else if (verbose) { + io::stdout << "Output:\n\n" << commandOutput << "\n"; } //================================ } diff --git a/src/occa/internal/modes/hip/device.cpp b/src/occa/internal/modes/hip/device.cpp index 7b06770ea..5fd9a72d7 100644 --- a/src/occa/internal/modes/hip/device.cpp +++ b/src/occa/internal/modes/hip/device.cpp @@ -310,6 +310,8 @@ namespace occa { << "Output:\n\n" << commandOutput << "\n" ); + } else if (verbose) { + io::stdout << "Output:\n\n" << commandOutput << "\n"; } //================================ } diff --git a/src/occa/internal/modes/metal/device.cpp b/src/occa/internal/modes/metal/device.cpp index 261c5e1b9..bd31e5c7a 100644 --- a/src/occa/internal/modes/metal/device.cpp +++ b/src/occa/internal/modes/metal/device.cpp @@ -176,6 +176,8 @@ namespace occa { << "Output:\n\n" << commandOutput << "\n" ); + } else if (verbose) { + io::stdout << "Output:\n\n" << commandOutput << "\n"; } return true; @@ -212,6 +214,8 @@ namespace occa { << "Output:\n\n" << commandOutput << "\n" ); + } else if (verbose) { + io::stdout << "Output:\n\n" << commandOutput << "\n"; } //================================ } From ad474578d0c5e66da6ae640513ae84aa207aa22b Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Thu, 1 Sep 2022 10:17:43 -0500 Subject: [PATCH 09/23] CMake find OpenCL (#616) * Format OpenCL variable names to match CMake find_package convention. * Updates search procedure for OpenCL headers and libraries. --- cmake/FindOpenCLWrapper.cmake | 53 +++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/cmake/FindOpenCLWrapper.cmake b/cmake/FindOpenCLWrapper.cmake index e536adc76..fa54e2c20 100644 --- a/cmake/FindOpenCLWrapper.cmake +++ b/cmake/FindOpenCLWrapper.cmake @@ -3,31 +3,42 @@ # This Find module is also distributed alongside the occa package config file! ############################################################################### -# Look in some default places for OpenCL and set OPENCL_ROOT if not already set -if(NOT OPENCL_ROOT) - # Search in user specified path first - find_path(OPENCL_ROOT - NAMES CL/cl.h +# Try finding OpenCL. The user should set OpenCL_ROOT if needed. +find_package(OpenCL QUIET) +if(NOT OpenCL_FOUND) + # Otherwise, look for the headers and library in standard locations + find_path(OpenCL_INCLUDE_DIR + NAMES CL/cl.h OpenCL/cl.h PATHS - ENV OPENCL_PATH - DOC "OPENCL root location" - NO_DEFAULT_PATH) + ENV CUDA_PATH + ENV CUDAToolkit_ROOT + ENV ROCM_PATH + ENV NVHPC_ROOT + ENV SYCL_ROOT + /usr/local/cuda + /opt/rocm/opencl + /opt/intel/oneapi/compiler/latest/linux + PATH_SUFFIXES + include + include/sycl + ) - # Now search in default path - find_path(OPENCL_ROOT - NAMES CL/cl.h - PATHS - /usr - /opt/rocm/opencl - /usr/local/cuda + find_library(OpenCL_LIBRARY + NAMES OpenCL libOpenCL + PATHS + ENV CUDA_PATH + ENV CUDAToolkit_ROOT + ENV ROCM_PATH + ENV NVHPC_ROOT + ENV SYCL_ROOT + /usr/local/cuda + /opt/rocm/opencl /opt/intel/oneapi/compiler/latest/linux - PATH_SUFFIXES sycl - DOC "OPENCL root location") + PATH_SUFFIXES + lib + lib64 + ) endif() - -# Trick CMake's default OpenCL module to look in our directory -set(ENV{AMDAPPSDKROOT} ${OPENCL_ROOT}) - find_package(OpenCL) include(FindPackageHandleStandardArgs) From eaef90ecf376121a5c04de9f4e83fead44c05603 Mon Sep 17 00:00:00 2001 From: Stefan Frijters Date: Thu, 8 Sep 2022 18:38:17 +0200 Subject: [PATCH 10/23] Improve compilation output (#620) * Report exit code when compiler invocation fails * Make DPCPP and Metal compileKernel output more consistent with other modes * Check whether popen actually returns a stream --- src/occa/internal/modes/cuda/device.cpp | 2 +- src/occa/internal/modes/dpcpp/device.cpp | 26 ++++++++++++++++-------- src/occa/internal/modes/hip/device.cpp | 2 +- src/occa/internal/modes/metal/device.cpp | 8 ++++---- src/occa/internal/utils/sys.cpp | 11 ++++++++-- 5 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/occa/internal/modes/cuda/device.cpp b/src/occa/internal/modes/cuda/device.cpp index 85471ec1a..ce7e2fc21 100644 --- a/src/occa/internal/modes/cuda/device.cpp +++ b/src/occa/internal/modes/cuda/device.cpp @@ -315,7 +315,7 @@ namespace occa { if (commandExitCode) { OCCA_FORCE_ERROR( "Error compiling [" << kernelName << "]," - " Command: [" << sCommand << ']' + " Command: [" << sCommand << "] exited with code " << commandExitCode << "\n" << "Output:\n\n" << commandOutput << "\n" ); diff --git a/src/occa/internal/modes/dpcpp/device.cpp b/src/occa/internal/modes/dpcpp/device.cpp index 45d2e99ec..de3b00bfb 100644 --- a/src/occa/internal/modes/dpcpp/device.cpp +++ b/src/occa/internal/modes/dpcpp/device.cpp @@ -215,17 +215,25 @@ namespace occa } const std::string &sCommand = command.str(); - if (verbose) - { - io::stdout << sCommand << '\n'; + if (verbose) { + io::stdout << "Compiling [" << kernelName << "]\n" << sCommand << "\n"; } - const int compileError = system(sCommand.c_str()); - - if (compileError) - { - OCCA_FORCE_ERROR("Error compiling [" << kernelName << "]," - << " Command: ["<< sCommand << ']'); + std::string commandOutput; + const int commandExitCode = sys::call( + sCommand.c_str(), + commandOutput + ); + + if (commandExitCode) { + OCCA_FORCE_ERROR( + "Error compiling [" << kernelName << "]," + " Command: [" << sCommand << "] exited with code " << commandExitCode << "\n\n" + << "Output:\n\n" + << commandOutput << "\n" + ); + } else if (verbose) { + io::stdout << "Output:\n\n" << commandOutput << "\n"; } } diff --git a/src/occa/internal/modes/hip/device.cpp b/src/occa/internal/modes/hip/device.cpp index 5fd9a72d7..e2befe66f 100644 --- a/src/occa/internal/modes/hip/device.cpp +++ b/src/occa/internal/modes/hip/device.cpp @@ -306,7 +306,7 @@ namespace occa { if (commandExitCode) { OCCA_FORCE_ERROR( "Error compiling [" << kernelName << "]," - " Command: [" << sCommand << "]\n" + " Command: [" << sCommand << "] exited with code " << commandExitCode << "\n\n" << "Output:\n\n" << commandOutput << "\n" ); diff --git a/src/occa/internal/modes/metal/device.cpp b/src/occa/internal/modes/metal/device.cpp index bd31e5c7a..4f311587a 100644 --- a/src/occa/internal/modes/metal/device.cpp +++ b/src/occa/internal/modes/metal/device.cpp @@ -160,7 +160,7 @@ namespace occa { const std::string airCommand = command.str(); if (verbose) { - io::stdout << "Compiling [" << kernelName << "]\n" << airCommand << "\n"; + io::stdout << "Compiling Air Binary [" << kernelName << "]\n" << airCommand << "\n"; } std::string commandOutput; @@ -172,7 +172,7 @@ namespace occa { if (commandExitCode) { OCCA_FORCE_ERROR( "Error compiling [" << kernelName << "]," - " Command: [" << airCommand << ']' + " Command: [" << airCommand << "] exited with code " << commandExitCode << "\n\n" << "Output:\n\n" << commandOutput << "\n" ); @@ -198,7 +198,7 @@ namespace occa { const std::string metallibCommand = command.str(); if (verbose) { - io::stdout << metallibCommand << '\n'; + io::stdout << "Compiling Metallib [" << kernelName << "]\n" << metallibCommand << "\n"; } std::string commandOutput; @@ -210,7 +210,7 @@ namespace occa { if (commandExitCode) { OCCA_FORCE_ERROR( "Error compiling [" << kernelName << "]," - " Command: [" << metallibCommand << ']' + " Command: [" << metallibCommand << "] exited with code " << commandExitCode << "\n\n" << "Output:\n\n" << commandOutput << "\n" ); diff --git a/src/occa/internal/utils/sys.cpp b/src/occa/internal/utils/sys.cpp index 7147ded9c..ea8442cfe 100644 --- a/src/occa/internal/utils/sys.cpp +++ b/src/occa/internal/utils/sys.cpp @@ -179,9 +179,11 @@ namespace occa { int call(const std::string &cmdline) { #if (OCCA_OS & (OCCA_LINUX_OS | OCCA_MACOS_OS)) FILE *fp = popen(cmdline.c_str(), "r"); + if (!fp) return errno; return pclose(fp); #else FILE *fp = _popen(cmdline.c_str(), "r"); + if (!fp) return errno; return _pclose(fp); #endif } @@ -193,8 +195,13 @@ namespace occa { FILE *fp = _popen(cmdline.c_str(), "r"); #endif - size_t lineBytes = 512; - char lineBuffer[512]; + if (!fp) { + output = "Failed to launch process"; + return errno; + } + + const size_t lineBytes = 512; + char lineBuffer[lineBytes]; output = ""; while (fgets(lineBuffer, lineBytes, fp)) { From 50a8a9b265126d0c1a2c23f73f861f42455f40d8 Mon Sep 17 00:00:00 2001 From: Malachi Date: Wed, 28 Sep 2022 12:34:23 -0500 Subject: [PATCH 11/23] Make compiler_flags setting take precedence over the compiler flag env var (#622) * Make compiler_flags setting take precedence over the compiler flag env var * Apply same change to SYCL backend. --- src/occa/internal/modes/cuda/device.cpp | 6 +++--- src/occa/internal/modes/dpcpp/utils.cpp | 8 ++++---- src/occa/internal/modes/hip/device.cpp | 6 +++--- src/occa/internal/modes/opencl/device.cpp | 6 +++--- src/occa/internal/modes/serial/device.cpp | 6 +++--- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/occa/internal/modes/cuda/device.cpp b/src/occa/internal/modes/cuda/device.cpp index ce7e2fc21..c61b560de 100644 --- a/src/occa/internal/modes/cuda/device.cpp +++ b/src/occa/internal/modes/cuda/device.cpp @@ -47,10 +47,10 @@ namespace occa { compiler = "nvcc"; } - if (env::var("OCCA_CUDA_COMPILER_FLAGS").size()) { - compilerFlags = env::var("OCCA_CUDA_COMPILER_FLAGS"); - } else if (kernelProps.get("compiler_flags").size()) { + if (kernelProps.get("compiler_flags").size()) { compilerFlags = (std::string) kernelProps["compiler_flags"]; + } else if (env::var("OCCA_CUDA_COMPILER_FLAGS").size()) { + compilerFlags = env::var("OCCA_CUDA_COMPILER_FLAGS"); } else { compilerFlags = "-O3"; } diff --git a/src/occa/internal/modes/dpcpp/utils.cpp b/src/occa/internal/modes/dpcpp/utils.cpp index a9ac6fceb..ab2c9a2f4 100644 --- a/src/occa/internal/modes/dpcpp/utils.cpp +++ b/src/occa/internal/modes/dpcpp/utils.cpp @@ -43,13 +43,13 @@ namespace occa void setCompilerFlags(occa::json &dpcpp_properties) noexcept { std::string compiler_flags; - if (env::var("OCCA_DPCPP_COMPILER_FLAGS").size()) + if (dpcpp_properties.has("compiler_flags")) { - compiler_flags = env::var("OCCA_DPCPP_COMPILER_FLAGS"); + compiler_flags = dpcpp_properties["compiler_flags"].toString(); } - else if (dpcpp_properties.has("compiler_flags")) + else if (env::var("OCCA_DPCPP_COMPILER_FLAGS").size()) { - compiler_flags = dpcpp_properties["compiler_flags"].toString(); + compiler_flags = env::var("OCCA_DPCPP_COMPILER_FLAGS"); } dpcpp_properties["compiler_flags"] = compiler_flags; } diff --git a/src/occa/internal/modes/hip/device.cpp b/src/occa/internal/modes/hip/device.cpp index e2befe66f..0b7453fae 100644 --- a/src/occa/internal/modes/hip/device.cpp +++ b/src/occa/internal/modes/hip/device.cpp @@ -52,10 +52,10 @@ namespace occa { compiler = "hipcc"; } - if (env::var("OCCA_HIP_COMPILER_FLAGS").size()) { - compilerFlags = env::var("OCCA_HIP_COMPILER_FLAGS"); - } else if (kernelProps.get("compiler_flags").size()) { + if (kernelProps.get("compiler_flags").size()) { compilerFlags = (std::string) kernelProps["compiler_flags"]; + } else if (env::var("OCCA_HIP_COMPILER_FLAGS").size()) { + compilerFlags = env::var("OCCA_HIP_COMPILER_FLAGS"); } else { compilerFlags = "-O3"; } diff --git a/src/occa/internal/modes/opencl/device.cpp b/src/occa/internal/modes/opencl/device.cpp index 16be70241..c5d090751 100644 --- a/src/occa/internal/modes/opencl/device.cpp +++ b/src/occa/internal/modes/opencl/device.cpp @@ -42,10 +42,10 @@ namespace occa { std::string compilerFlags; // Use "-cl-opt-disable" for debug-mode - if (env::var("OCCA_OPENCL_COMPILER_FLAGS").size()) { - compilerFlags = env::var("OCCA_OPENCL_COMPILER_FLAGS"); - } else if (kernelProps.has("compiler_flags")) { + if (kernelProps.has("compiler_flags")) { compilerFlags = (std::string) kernelProps["compiler_flags"]; + } else if (env::var("OCCA_OPENCL_COMPILER_FLAGS").size()) { + compilerFlags = env::var("OCCA_OPENCL_COMPILER_FLAGS"); } std::string ocl_c_ver = "2.0"; diff --git a/src/occa/internal/modes/serial/device.cpp b/src/occa/internal/modes/serial/device.cpp index ce3df6586..4918265ca 100644 --- a/src/occa/internal/modes/serial/device.cpp +++ b/src/occa/internal/modes/serial/device.cpp @@ -193,12 +193,12 @@ namespace occa { #endif } - if (compilerLanguageFlag == sys::language::CPP && env::var("OCCA_CXXFLAGS").size()) { + if (kernelProps.get("compiler_flags").size()) { + compilerFlags = (std::string) kernelProps["compiler_flags"]; + } else if (compilerLanguageFlag == sys::language::CPP && env::var("OCCA_CXXFLAGS").size()) { compilerFlags = env::var("OCCA_CXXFLAGS"); } else if (compilerLanguageFlag == sys::language::C && env::var("OCCA_CFLAGS").size()) { compilerFlags = env::var("OCCA_CFLAGS"); - } else if (kernelProps.get("compiler_flags").size()) { - compilerFlags = (std::string) kernelProps["compiler_flags"]; } else if (compilerLanguageFlag == sys::language::CPP && env::var("CXXFLAGS").size()) { compilerFlags = env::var("CXXFLAGS"); } else if (compilerLanguageFlag == sys::language::C && env::var("CFLAGS").size()) { From f3373b7ef38eedce625f4f16a167028cbd1eb462 Mon Sep 17 00:00:00 2001 From: Noel Chalmers Date: Mon, 3 Oct 2022 11:06:35 -0500 Subject: [PATCH 12/23] [CUDA][HIP][DPC++] Fix an issue with double frees when OKL has multiple kernels (#624) --- src/occa/internal/modes/cuda/device.cpp | 2 +- src/occa/internal/modes/cuda/kernel.cpp | 12 +++++++++++- src/occa/internal/modes/cuda/kernel.hpp | 7 +++++++ src/occa/internal/modes/dpcpp/device.cpp | 6 +++--- src/occa/internal/modes/dpcpp/kernel.cpp | 14 +++++++++++++- src/occa/internal/modes/dpcpp/kernel.hpp | 7 +++++++ src/occa/internal/modes/hip/device.cpp | 2 +- src/occa/internal/modes/hip/kernel.cpp | 12 +++++++++++- src/occa/internal/modes/hip/kernel.hpp | 7 +++++++ 9 files changed, 61 insertions(+), 8 deletions(-) diff --git a/src/occa/internal/modes/cuda/device.cpp b/src/occa/internal/modes/cuda/device.cpp index c61b560de..a1bd1684e 100644 --- a/src/occa/internal/modes/cuda/device.cpp +++ b/src/occa/internal/modes/cuda/device.cpp @@ -348,6 +348,7 @@ namespace occa { kernel &k = *(new kernel(this, kernelName, sourceFilename, + cuModule, kernelProps)); k.launcherKernel = buildLauncherKernel(kernelHash, @@ -377,7 +378,6 @@ namespace occa { kernel *cuKernel = new kernel(this, metadata.name, sourceFilename, - cuModule, cuFunction, kernelProps); cuKernel->metadata = metadata; diff --git a/src/occa/internal/modes/cuda/kernel.cpp b/src/occa/internal/modes/cuda/kernel.cpp index 568e7d47b..292a3cbf7 100644 --- a/src/occa/internal/modes/cuda/kernel.cpp +++ b/src/occa/internal/modes/cuda/kernel.cpp @@ -10,11 +10,21 @@ namespace occa { kernel::kernel(modeDevice_t *modeDevice_, const std::string &name_, const std::string &sourceFilename_, + CUmodule cuModule_, const occa::json &properties_) : occa::launchedModeKernel_t(modeDevice_, name_, sourceFilename_, properties_), - cuModule(NULL), + cuModule(cuModule_), cuFunction(NULL) {} + kernel::kernel(modeDevice_t *modeDevice_, + const std::string &name_, + const std::string &sourceFilename_, + CUfunction cuFunction_, + const occa::json &properties_) : + occa::launchedModeKernel_t(modeDevice_, name_, sourceFilename_, properties_), + cuModule(NULL), + cuFunction(cuFunction_) {} + kernel::kernel(modeDevice_t *modeDevice_, const std::string &name_, const std::string &sourceFilename_, diff --git a/src/occa/internal/modes/cuda/kernel.hpp b/src/occa/internal/modes/cuda/kernel.hpp index 0b4cb68f6..f7bb9c8a5 100644 --- a/src/occa/internal/modes/cuda/kernel.hpp +++ b/src/occa/internal/modes/cuda/kernel.hpp @@ -23,6 +23,13 @@ namespace occa { kernel(modeDevice_t *modeDevice_, const std::string &name_, const std::string &sourceFilename_, + CUmodule cuModule_, + const occa::json &properties_); + + kernel(modeDevice_t *modeDevice_, + const std::string &name_, + const std::string &sourceFilename_, + CUfunction cuFunction_, const occa::json &properties_); kernel(modeDevice_t *modeDevice_, diff --git a/src/occa/internal/modes/dpcpp/device.cpp b/src/occa/internal/modes/dpcpp/device.cpp index de3b00bfb..d2a1a1377 100644 --- a/src/occa/internal/modes/dpcpp/device.cpp +++ b/src/occa/internal/modes/dpcpp/device.cpp @@ -246,9 +246,12 @@ namespace occa lang::sourceMetadata_t &deviceMetadata, const occa::json &kernelProps) { + void *dl_handle = sys::dlopen(binaryFilename); + dpcpp::kernel &k = *(new dpcpp::kernel(this, kernelName, sourceFilename, + dl_handle, kernelProps)); k.launcherKernel = buildLauncherKernel(kernelHash, @@ -260,8 +263,6 @@ namespace occa kernelName, deviceMetadata); - void *dl_handle = sys::dlopen(binaryFilename); - const int launchedKernelsCount = (int)launchedKernelsMetadata.size(); for (int i = 0; i < launchedKernelsCount; ++i) { @@ -279,7 +280,6 @@ namespace occa kernel *dpcppKernel = new dpcpp::kernel(this, metadata.name, sourceFilename, - dl_handle, kernel_function, kernelProps); diff --git a/src/occa/internal/modes/dpcpp/kernel.cpp b/src/occa/internal/modes/dpcpp/kernel.cpp index 735d0c6a2..8f0c357ee 100644 --- a/src/occa/internal/modes/dpcpp/kernel.cpp +++ b/src/occa/internal/modes/dpcpp/kernel.cpp @@ -14,13 +14,25 @@ namespace occa kernel::kernel(modeDevice_t *modeDevice_, const std::string &name_, const std::string &sourceFilename_, + void *dlHandle_, const occa::json &properties_) : occa::launchedModeKernel_t(modeDevice_, name_, sourceFilename_, properties_), - dlHandle{nullptr}, + dlHandle{dlHandle_}, function{nullptr} { } + kernel::kernel(modeDevice_t *modeDevice_, + const std::string &name_, + const std::string &sourceFilename_, + functionPtr_t function_, + const occa::json &properties_) + : occa::launchedModeKernel_t(modeDevice_, name_, sourceFilename_, properties_), + dlHandle(nullptr), + function(function_) + { + } + kernel::kernel(modeDevice_t *modeDevice_, const std::string &name_, const std::string &sourceFilename_, diff --git a/src/occa/internal/modes/dpcpp/kernel.hpp b/src/occa/internal/modes/dpcpp/kernel.hpp index c2b41a65d..26d208300 100644 --- a/src/occa/internal/modes/dpcpp/kernel.hpp +++ b/src/occa/internal/modes/dpcpp/kernel.hpp @@ -24,6 +24,13 @@ namespace occa kernel(modeDevice_t *modeDevice_, const std::string &name_, const std::string &sourceFilename_, + void* dlHandle_, + const occa::json &properties_); + + kernel(modeDevice_t *modeDevice_, + const std::string &name_, + const std::string &sourceFilename_, + functionPtr_t function_, const occa::json &properties_); kernel(modeDevice_t *modeDevice_, diff --git a/src/occa/internal/modes/hip/device.cpp b/src/occa/internal/modes/hip/device.cpp index 0b7453fae..f8f4977d0 100644 --- a/src/occa/internal/modes/hip/device.cpp +++ b/src/occa/internal/modes/hip/device.cpp @@ -333,6 +333,7 @@ namespace occa { kernel &k = *(new kernel(this, kernelName, sourceFilename, + hipModule, kernelProps)); k.launcherKernel = buildLauncherKernel(kernelHash, @@ -360,7 +361,6 @@ namespace occa { kernel *hipKernel = new kernel(this, metadata.name, sourceFilename, - hipModule, hipFunction, kernelProps); hipKernel->metadata = metadata; diff --git a/src/occa/internal/modes/hip/kernel.cpp b/src/occa/internal/modes/hip/kernel.cpp index 3e2d6ba69..b381a3438 100644 --- a/src/occa/internal/modes/hip/kernel.cpp +++ b/src/occa/internal/modes/hip/kernel.cpp @@ -10,11 +10,21 @@ namespace occa { kernel::kernel(modeDevice_t *modeDevice_, const std::string &name_, const std::string &sourceFilename_, + hipModule_t hipModule_, const occa::json &properties_) : occa::launchedModeKernel_t(modeDevice_, name_, sourceFilename_, properties_), - hipModule(NULL), + hipModule(hipModule_), hipFunction(NULL) {} + kernel::kernel(modeDevice_t *modeDevice_, + const std::string &name_, + const std::string &sourceFilename_, + hipFunction_t hipFunction_, + const occa::json &properties_) : + occa::launchedModeKernel_t(modeDevice_, name_, sourceFilename_, properties_), + hipModule(NULL), + hipFunction(hipFunction_) {} + kernel::kernel(modeDevice_t *modeDevice_, const std::string &name_, const std::string &sourceFilename_, diff --git a/src/occa/internal/modes/hip/kernel.hpp b/src/occa/internal/modes/hip/kernel.hpp index d0863894a..967513734 100644 --- a/src/occa/internal/modes/hip/kernel.hpp +++ b/src/occa/internal/modes/hip/kernel.hpp @@ -21,6 +21,13 @@ namespace occa { kernel(modeDevice_t *modeDevice_, const std::string &name_, const std::string &sourceFilename_, + hipModule_t hipModule_, + const occa::json &properties_); + + kernel(modeDevice_t *modeDevice_, + const std::string &name_, + const std::string &sourceFilename_, + hipFunction_t hipFunction_, const occa::json &properties_); kernel(modeDevice_t *modeDevice_, From f5e49726a42df2648f1eea6186da6ab159de19a1 Mon Sep 17 00:00:00 2001 From: Noel Chalmers Date: Mon, 3 Oct 2022 11:12:24 -0500 Subject: [PATCH 13/23] [Serial] Add dynamic exclusive variable sizes (#625) --- src/occa/internal/lang/modes/serial.cpp | 123 ++++++++++++++++++++++-- src/occa/internal/lang/modes/serial.hpp | 7 +- 2 files changed, 123 insertions(+), 7 deletions(-) diff --git a/src/occa/internal/lang/modes/serial.cpp b/src/occa/internal/lang/modes/serial.cpp index 83cc43073..c7d044f9a 100644 --- a/src/occa/internal/lang/modes/serial.cpp +++ b/src/occa/internal/lang/modes/serial.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -133,7 +134,7 @@ namespace occa { (smnt->type() & statementType::declaration) && ((declarationStatement*) smnt)->declaresVariable(var) ) { - defineExclusiveVariableAsArray(var); + defineExclusiveVariableAsArray((declarationStatement&) *smnt, var); return &varNode; } @@ -264,12 +265,122 @@ namespace occa { } } - void serialParser::defineExclusiveVariableAsArray(variable_t &var) { - // TODO: Dynamic array sizes - // Define the variable as a stack array + int serialParser::getInnerLoopLevel(forStatement &forSmnt) { + statement_t *smnt = forSmnt.up; + int level = 0; + while (smnt) { + if ((smnt->type() & statementType::for_) + && smnt->hasAttribute("inner")) { + ++level; + } + smnt = smnt->up; + } + return level; + } + + forStatement* serialParser::getInnerMostInnerLoop(forStatement &forSmnt) { + int maxLevel = -1; + forStatement *innerMostInnerLoop = NULL; + + statementArray::from(forSmnt) + .flatFilterByAttribute("inner") + .filterByStatementType(statementType::for_) + .forEach([&](statement_t *smnt) { + forStatement &innerSmnt = (forStatement&) *smnt; + const int level = getInnerLoopLevel(innerSmnt); + if (level > maxLevel) { + maxLevel = level; + innerMostInnerLoop = &innerSmnt; + } + }); + + return innerMostInnerLoop; + } + + void serialParser::defineExclusiveVariableAsArray(declarationStatement &declSmnt, + variable_t &var) { + // Find outer-most outer loop + statement_t *smnt = declSmnt.up; + forStatement *outerMostOuterLoop = NULL; + while (smnt) { + if (smnt->hasAttribute("outer")) { + outerMostOuterLoop = (forStatement*) smnt; + } + smnt = smnt->up; + } + + // Check if outer loop has max_inner_dims set + bool maxInnerDimsKnown{false}; + int maxInnerDims[3] = {1,1,1}; + if (outerMostOuterLoop->hasAttribute("max_inner_dims")) { + maxInnerDimsKnown = true; + attributeToken_t& attr = outerMostOuterLoop->attributes["max_inner_dims"]; + + for(size_t i=0; i < attr.args.size(); ++i) { + exprNode* expr = attr.args[i].expr; + primitive value = expr->evaluate(); + maxInnerDims[i] = value; + } + } + + //Check if inner dimensions are known at compile time + bool innerDimsKnown{true}; + int knownInnerDims[3] = {1,1,1}; + forStatement *innerSmnt = getInnerMostInnerLoop(*outerMostOuterLoop); + statementArray path = oklForStatement::getOklLoopPath(*innerSmnt); + + int innerIndex; + const int pathCount = (int) path.length(); + for (int i = 0; i < pathCount; ++i) { + forStatement &pathSmnt = *((forStatement*) path[i]); + oklForStatement oklForSmnt(pathSmnt); + + if(pathSmnt.hasAttribute("inner")) { + innerIndex = oklForSmnt.oklLoopIndex(); + if(oklForSmnt.getIterationCount()->canEvaluate()) { + knownInnerDims[innerIndex] = (int) oklForSmnt.getIterationCount()->evaluate(); + } else { + std::string s = oklForSmnt.getIterationCount()->toString(); + if(s.find("_occa_tiled_") != std::string::npos) { + size_t tile_size = s.find_first_of("123456789"); + OCCA_ERROR("@tile size is undefined!",tile_size != std::string::npos); + knownInnerDims[innerIndex] = std::stoi(s.substr(tile_size)); + } else { + //loop bounds are unknown at compile time + innerDimsKnown=false; + break; + } + } + } + } + const int knownInnerDim = knownInnerDims[0] + * knownInnerDims[1] + * knownInnerDims[2]; + const int maxInnerDim = maxInnerDims[0] + * maxInnerDims[1] + * maxInnerDims[2]; + + if (innerDimsKnown & maxInnerDimsKnown) { + if (knownInnerDim > maxInnerDim) { + outerMostOuterLoop->printError("[@inner] loop dimensions larger then allowed by [@max_inner_dims]"); + success=false; + return; + } + } + + // Determine how long the exclusive array should be + int exclusiveArraySize = 1024; + if (maxInnerDimsKnown) { + exclusiveArraySize = maxInnerDim; + } + if (innerDimsKnown) { + exclusiveArraySize = knownInnerDim; + } + + // Make exclusive variable declaration into an array // For example: // const int x - // -> const int x[256] + // -> const int x[1024] operatorToken startToken(var.source->origin, op::bracketStart); operatorToken endToken(var.source->origin, @@ -280,7 +391,7 @@ namespace occa { array_t(startToken, endToken, new primitiveNode(var.source, - 256)) + exclusiveArraySize)) ); } diff --git a/src/occa/internal/lang/modes/serial.hpp b/src/occa/internal/lang/modes/serial.hpp index ea24fe93c..29bc019f3 100644 --- a/src/occa/internal/lang/modes/serial.hpp +++ b/src/occa/internal/lang/modes/serial.hpp @@ -26,7 +26,12 @@ namespace occa { void setupExclusiveDeclaration(declarationStatement &declSmnt); void setupExclusiveIndices(); - void defineExclusiveVariableAsArray(variable_t &var); + int getInnerLoopLevel(forStatement &forSmnt); + + forStatement* getInnerMostInnerLoop(forStatement &forSmnt); + + void defineExclusiveVariableAsArray(declarationStatement &declSmnt, + variable_t &var); exprNode* addExclusiveVariableArrayAccessor(statement_t &smnt, exprNode &expr, From d7d090952c6d09db04616249ca9cec241292cc22 Mon Sep 17 00:00:00 2001 From: Noel Chalmers Date: Mon, 17 Oct 2022 13:30:14 -0500 Subject: [PATCH 14/23] Fix warnings with gcc-11 (#627) * [CUDA][HIP][OpenCL] Fix some warnings with gcc11 * [HIP]{OpenCL] Fix a couple more warnings --- src/occa/internal/modes/cuda/device.cpp | 10 +++++----- src/occa/internal/modes/cuda/kernel.cpp | 2 +- src/occa/internal/modes/hip/device.cpp | 2 +- src/occa/internal/modes/hip/registration.cpp | 4 +++- src/occa/internal/modes/opencl/device.cpp | 2 +- src/occa/internal/modes/opencl/kernel.cpp | 4 ++-- src/occa/internal/modes/opencl/streamTag.cpp | 4 ++-- src/occa/internal/modes/opencl/utils.cpp | 16 ++++++++-------- 8 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/occa/internal/modes/cuda/device.cpp b/src/occa/internal/modes/cuda/device.cpp index a1bd1684e..859c03652 100644 --- a/src/occa/internal/modes/cuda/device.cpp +++ b/src/occa/internal/modes/cuda/device.cpp @@ -183,7 +183,7 @@ namespace occa { waitFor(endTag); - float msTimeTaken; + float msTimeTaken = 0.0; OCCA_CUDA_ERROR("Device: Timing Between Tags", cuEventElapsedTime(&msTimeTaken, cuStartTag->cuEvent, @@ -228,8 +228,8 @@ namespace occa { } // Regular CUDA Kernel - CUmodule cuModule; - CUfunction cuFunction; + CUmodule cuModule = NULL; + CUfunction cuFunction = NULL; CUresult error; setCudaContext(); @@ -333,7 +333,7 @@ namespace occa { lang::sourceMetadata_t &launcherMetadata, lang::sourceMetadata_t &deviceMetadata, const occa::json &kernelProps) { - CUmodule cuModule; + CUmodule cuModule = NULL; CUresult error; setCudaContext(); @@ -366,7 +366,7 @@ namespace occa { for (int i = 0; i < launchedKernelsCount; ++i) { lang::kernelMetadata_t &metadata = launchedKernelsMetadata[i]; - CUfunction cuFunction; + CUfunction cuFunction = NULL; error = cuModuleGetFunction(&cuFunction, cuModule, metadata.name.c_str()); diff --git a/src/occa/internal/modes/cuda/kernel.cpp b/src/occa/internal/modes/cuda/kernel.cpp index 292a3cbf7..b90a7b454 100644 --- a/src/occa/internal/modes/cuda/kernel.cpp +++ b/src/occa/internal/modes/cuda/kernel.cpp @@ -60,7 +60,7 @@ namespace occa { dim kernel::maxInnerDims() const { static dim maxInnerDims_(0); if (maxInnerDims_.x == 0) { - int maxSize; + int maxSize = 0; OCCA_CUDA_ERROR("Kernel: Getting Maximum Inner-Dim Size", cuFuncGetAttribute(&maxSize, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, diff --git a/src/occa/internal/modes/hip/device.cpp b/src/occa/internal/modes/hip/device.cpp index f8f4977d0..1310e64ec 100644 --- a/src/occa/internal/modes/hip/device.cpp +++ b/src/occa/internal/modes/hip/device.cpp @@ -166,7 +166,7 @@ namespace occa { waitFor(endTag); - float msTimeTaken = 0; + float msTimeTaken = 0.0; OCCA_HIP_ERROR("Device: Timing Between Tags", hipEventElapsedTime(&msTimeTaken, hipStartTag->hipEvent, diff --git a/src/occa/internal/modes/hip/registration.cpp b/src/occa/internal/modes/hip/registration.cpp index 740ab7423..bb44acc37 100644 --- a/src/occa/internal/modes/hip/registration.cpp +++ b/src/occa/internal/modes/hip/registration.cpp @@ -25,7 +25,9 @@ namespace occa { hipDeviceProp_t props; OCCA_HIP_ERROR("Getting device properties", hipGetDeviceProperties(&props, deviceId)); - strcpy(deviceName, props.name); + if (props.name != NULL) { + strcpy(deviceName, props.name); + } const udim_t bytes = props.totalGlobalMem; const std::string bytesStr = stringifyBytes(bytes); diff --git a/src/occa/internal/modes/opencl/device.cpp b/src/occa/internal/modes/opencl/device.cpp index c5d090751..bf1e463f5 100644 --- a/src/occa/internal/modes/opencl/device.cpp +++ b/src/occa/internal/modes/opencl/device.cpp @@ -123,7 +123,7 @@ namespace occa { } occa::streamTag device::tagStream() { - cl_event clEvent; + cl_event clEvent = NULL; #ifdef CL_VERSION_1_2 OCCA_OPENCL_ERROR("Device: Tagging Stream", diff --git a/src/occa/internal/modes/opencl/kernel.cpp b/src/occa/internal/modes/opencl/kernel.cpp index 29607f33a..7c0c7d9d0 100644 --- a/src/occa/internal/modes/opencl/kernel.cpp +++ b/src/occa/internal/modes/opencl/kernel.cpp @@ -82,8 +82,8 @@ namespace occa { // TODO 1.1: This should be in the device, not the kernel static occa::dim maxInnerDims_(0); if (maxInnerDims_.x == 0) { - size_t dims_; - size_t bytes; + size_t dims_ = 0; + size_t bytes = 0; OCCA_OPENCL_ERROR("Kernel: Max Inner Dims", clGetKernelWorkGroupInfo(clKernel, clDevice, diff --git a/src/occa/internal/modes/opencl/streamTag.cpp b/src/occa/internal/modes/opencl/streamTag.cpp index fced2cc5d..56c54d009 100644 --- a/src/occa/internal/modes/opencl/streamTag.cpp +++ b/src/occa/internal/modes/opencl/streamTag.cpp @@ -17,7 +17,7 @@ namespace occa { double streamTag::startTime() { if (start_time < 0) { - cl_ulong clTime; + cl_ulong clTime = 0; OCCA_OPENCL_ERROR("streamTag: Getting event profiling info", clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_START, @@ -31,7 +31,7 @@ namespace occa { double streamTag::endTime() { if (end_time < 0) { - cl_ulong clTime; + cl_ulong clTime = 0; OCCA_OPENCL_ERROR("streamTag: Getting event profiling info", clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_END, diff --git a/src/occa/internal/modes/opencl/utils.cpp b/src/occa/internal/modes/opencl/utils.cpp index 9dd9d1420..8fd4e650b 100644 --- a/src/occa/internal/modes/opencl/utils.cpp +++ b/src/occa/internal/modes/opencl/utils.cpp @@ -32,7 +32,7 @@ namespace occa { } int getPlatformCount() { - cl_uint platformCount; + cl_uint platformCount = 0; OCCA_OPENCL_ERROR("OpenCL: Get Platform ID Count", clGetPlatformIDs(0, NULL, &platformCount)); @@ -55,7 +55,7 @@ namespace occa { std::string platformStrInfo(cl_platform_id clPID, cl_platform_info clInfo) { - size_t bytes; + size_t bytes = 0; OCCA_OPENCL_ERROR("OpenCL: Getting Platform String Info", clGetPlatformInfo(clPID, @@ -123,7 +123,7 @@ namespace occa { } cl_device_type deviceType(info::device_type type) { - cl_device_type dtype; + cl_device_type dtype = CL_DEVICE_TYPE_ALL; switch (type) { case info::device_type::cpu: dtype = CL_DEVICE_TYPE_CPU; @@ -179,7 +179,7 @@ namespace occa { std::string deviceStrInfo(cl_device_id clDID, cl_device_info clInfo) { - size_t bytes; + size_t bytes = 0; OCCA_OPENCL_ERROR("OpenCL: Getting Device String Info", clGetDeviceInfo(clDID, @@ -238,7 +238,7 @@ namespace occa { info::device_type deviceType(int pID, int dID) { cl_device_id clDID = deviceID(pID, dID); - cl_device_type clDeviceType; + cl_device_type clDeviceType = CL_DEVICE_TYPE_ALL; OCCA_OPENCL_ERROR( "OpenCL: Get Device Type", @@ -267,7 +267,7 @@ namespace occa { int deviceCoreCount(int pID, int dID) { cl_device_id clDID = deviceID(pID, dID); - cl_uint ret; + cl_uint ret = 0; OCCA_OPENCL_ERROR("OpenCL: Get Device Core Count", clGetDeviceInfo(clDID, @@ -278,7 +278,7 @@ namespace occa { } udim_t deviceGlobalMemSize(cl_device_id dID) { - cl_ulong ret; + cl_ulong ret = 0; OCCA_OPENCL_ERROR("OpenCL: Get Device Available Memory", clGetDeviceInfo(dID, @@ -416,7 +416,7 @@ namespace occa { cl_int error = 1; cl_int binaryError = 1; - size_t binaryBytes; + size_t binaryBytes = 0; OCCA_OPENCL_ERROR( "saveProgramBinary: Getting Binary Sizes", clGetProgramInfo(info.clProgram, From b14b6cc84aa209a86d4d640177af133a6cfd13b0 Mon Sep 17 00:00:00 2001 From: Noel Chalmers Date: Mon, 17 Oct 2022 13:31:15 -0500 Subject: [PATCH 15/23] [OpenMP] Fix bug with cancelling hash (#626) --- src/occa/internal/modes/openmp/device.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/occa/internal/modes/openmp/device.cpp b/src/occa/internal/modes/openmp/device.cpp index 71f3a8473..eb354a205 100644 --- a/src/occa/internal/modes/openmp/device.cpp +++ b/src/occa/internal/modes/openmp/device.cpp @@ -14,14 +14,14 @@ namespace occa { hash_t device::hash() const { return ( serial::device::hash() - ^ occa::hash("openmp") + ^ occa::hash("openmp device::hash") ); } hash_t device::kernelHash(const occa::json &props) const { return ( serial::device::kernelHash(props) - ^ occa::hash("openmp") + ^ occa::hash("openmp device::kernelHash") ); } From a3ff2ed39fe077780094b255345cfba4ef8e57e3 Mon Sep 17 00:00:00 2001 From: Noel Chalmers Date: Thu, 27 Oct 2022 16:44:03 -0500 Subject: [PATCH 16/23] [API][Core] Memory Pools (#543) * API hooks for memoryPool * [MemPool] Internal hooks for memoryPool * [MemPool][CUDA] CUDA memoryPool * [MemPool][Serial] Serial Memory pool * [MemPool][HIP] HIP Memory Pool * [MemPool][SYCL] SYCL Memory Pool * [MemPool][OpenCL] OpenCL Memory Pool * [MemPool][Metal] Metal Memory Pool * [MemPool][Tests] Adding test for memoryPool * [Core][MemPool] Fix compiler warnings * [Buffer][MemoryPool] Fixing incorrect behavior of manual freeing of occa::memory * [HIP] Address some compiler warnings * [MemoryPool] Add missing override * [MemPool] Put memoryPool object in experimental namespace * [MemPool] Add a ShrinkToFit API * [MemPool] Fix an issue with releasing reservation * [Examples][Mempool] Add an example of memory pool usage * [OpenCL] Fix an uninitialized varible warning * [CI] Remove parallel ctest args * [CI] Remove memory pool example from OpenCL test * [MemPool] Move some mempool routines to a more centralized place * [Examples] Change mempool example to have stronger error checking * [Metal] Fix some memcpy offsets * [MemPool] Add alignment options to mempool * [MemPool] Update memPool example * [MemPool] Fix warning * [MemPool] Fix some more warnings * [MemPool] More warnings * [MemPool] Fixes for MemPool unit test * [MemPool] Fix a signed-unsigned comparison * [Core] Fix up when/where modeMemory and modeBuffers are deleted * [Core][C][MemPool] Add C API for MemoryPools * [Core][C][MemPool] Add src files for C API for MemoryPools * [Tests][C][MemPool] Adding test for MemoryPool C API * [Examples] Skip other modes for memory pool example * [Buffer] Fix access rights to members of buffer classes * [Buffer] Undo buffer::malloc needing a const argument --- .github/workflows/build.yml | 1 + examples/cpp/17_memory_pool/CMakeLists.txt | 4 + examples/cpp/17_memory_pool/Makefile | 30 ++ examples/cpp/17_memory_pool/README.md | 28 ++ examples/cpp/17_memory_pool/addVectors.okl | 8 + examples/cpp/17_memory_pool/main.cpp | 286 +++++++++++++++ examples/cpp/CMakeLists.txt | 1 + include/occa/c/base.h | 4 + include/occa/c/device.h | 5 + include/occa/c/experimental.h | 1 + include/occa/c/experimental/memoryPool.h | 42 +++ include/occa/c/types.h | 2 + include/occa/core/base.hpp | 2 + include/occa/core/device.hpp | 16 + include/occa/core/memoryPool.hpp | 261 +++++++++++++ include/occa/core/memoryPool.tpp | 8 + src/c/base.cpp | 14 + src/c/device.cpp | 17 + src/c/experimental/memoryPool.cpp | 74 ++++ src/core/base.cpp | 4 + src/core/device.cpp | 11 + src/core/memory.cpp | 5 +- src/core/memoryPool.cpp | 182 +++++++++ src/occa/internal/c/types.cpp | 44 +++ src/occa/internal/c/types.hpp | 13 +- src/occa/internal/core/buffer.cpp | 4 - src/occa/internal/core/buffer.hpp | 13 +- src/occa/internal/core/device.hpp | 2 + src/occa/internal/core/memory.cpp | 27 +- src/occa/internal/core/memory.hpp | 1 - src/occa/internal/core/memoryPool.cpp | 347 ++++++++++++++++++ src/occa/internal/core/memoryPool.hpp | 60 +++ src/occa/internal/modes/cuda/buffer.hpp | 14 +- src/occa/internal/modes/cuda/device.cpp | 5 + src/occa/internal/modes/cuda/device.hpp | 2 + src/occa/internal/modes/cuda/memory.cpp | 19 +- src/occa/internal/modes/cuda/memory.hpp | 6 +- src/occa/internal/modes/cuda/memoryPool.cpp | 61 +++ src/occa/internal/modes/cuda/memoryPool.hpp | 31 ++ src/occa/internal/modes/dpcpp/buffer.hpp | 6 +- src/occa/internal/modes/dpcpp/device.cpp | 5 + src/occa/internal/modes/dpcpp/device.hpp | 2 + src/occa/internal/modes/dpcpp/memory.cpp | 12 +- src/occa/internal/modes/dpcpp/memory.hpp | 6 +- src/occa/internal/modes/dpcpp/memoryPool.cpp | 40 ++ src/occa/internal/modes/dpcpp/memoryPool.hpp | 29 ++ src/occa/internal/modes/hip/buffer.cpp | 2 +- src/occa/internal/modes/hip/buffer.hpp | 14 +- src/occa/internal/modes/hip/device.cpp | 5 + src/occa/internal/modes/hip/device.hpp | 2 + src/occa/internal/modes/hip/memory.cpp | 17 +- src/occa/internal/modes/hip/memory.hpp | 6 +- src/occa/internal/modes/hip/memoryPool.cpp | 64 ++++ src/occa/internal/modes/hip/memoryPool.hpp | 31 ++ src/occa/internal/modes/metal/buffer.hpp | 20 +- src/occa/internal/modes/metal/device.cpp | 5 + src/occa/internal/modes/metal/device.hpp | 2 + src/occa/internal/modes/metal/memory.cpp | 23 +- src/occa/internal/modes/metal/memory.hpp | 8 +- src/occa/internal/modes/metal/memoryPool.cpp | 52 +++ src/occa/internal/modes/metal/memoryPool.hpp | 29 ++ src/occa/internal/modes/opencl/buffer.hpp | 19 +- src/occa/internal/modes/opencl/device.cpp | 5 + src/occa/internal/modes/opencl/device.hpp | 2 + src/occa/internal/modes/opencl/memory.cpp | 33 +- src/occa/internal/modes/opencl/memory.hpp | 8 +- src/occa/internal/modes/opencl/memoryPool.cpp | 70 ++++ src/occa/internal/modes/opencl/memoryPool.hpp | 31 ++ src/occa/internal/modes/opencl/polyfill.hpp | 1 + src/occa/internal/modes/opencl/utils.cpp | 2 +- src/occa/internal/modes/serial/buffer.hpp | 6 +- src/occa/internal/modes/serial/device.cpp | 5 + src/occa/internal/modes/serial/device.hpp | 2 + src/occa/internal/modes/serial/memory.cpp | 11 +- src/occa/internal/modes/serial/memory.hpp | 6 +- src/occa/internal/modes/serial/memoryPool.cpp | 37 ++ src/occa/internal/modes/serial/memoryPool.hpp | 29 ++ tests/src/c/memoryPool.cpp | 232 ++++++++++++ tests/src/core/memoryPool.cpp | 167 +++++++++ 79 files changed, 2598 insertions(+), 103 deletions(-) create mode 100644 examples/cpp/17_memory_pool/CMakeLists.txt create mode 100644 examples/cpp/17_memory_pool/Makefile create mode 100644 examples/cpp/17_memory_pool/README.md create mode 100644 examples/cpp/17_memory_pool/addVectors.okl create mode 100644 examples/cpp/17_memory_pool/main.cpp create mode 100644 include/occa/c/experimental/memoryPool.h create mode 100644 include/occa/core/memoryPool.hpp create mode 100644 include/occa/core/memoryPool.tpp create mode 100644 src/c/experimental/memoryPool.cpp create mode 100644 src/core/memoryPool.cpp create mode 100644 src/occa/internal/core/memoryPool.cpp create mode 100644 src/occa/internal/core/memoryPool.hpp create mode 100644 src/occa/internal/modes/cuda/memoryPool.cpp create mode 100644 src/occa/internal/modes/cuda/memoryPool.hpp create mode 100644 src/occa/internal/modes/dpcpp/memoryPool.cpp create mode 100644 src/occa/internal/modes/dpcpp/memoryPool.hpp create mode 100644 src/occa/internal/modes/hip/memoryPool.cpp create mode 100644 src/occa/internal/modes/hip/memoryPool.hpp create mode 100644 src/occa/internal/modes/metal/memoryPool.cpp create mode 100644 src/occa/internal/modes/metal/memoryPool.hpp create mode 100644 src/occa/internal/modes/opencl/memoryPool.cpp create mode 100644 src/occa/internal/modes/opencl/memoryPool.hpp create mode 100644 src/occa/internal/modes/serial/memoryPool.cpp create mode 100644 src/occa/internal/modes/serial/memoryPool.hpp create mode 100644 tests/src/c/memoryPool.cpp create mode 100644 tests/src/core/memoryPool.cpp diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9889d038a..af52a44a5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -178,6 +178,7 @@ jobs: if: ${{ matrix.useCMake && !matrix.useoneAPI }} run: | ctest --test-dir build --progress --output-on-failure --parallel 8 --schedule-random -E "examples_cpp_arrays-opencl|examples_cpp_for_loops-opencl|examples_cpp_generic_inline_kernel-opencl|examples_cpp_shared_memory-opencl|examples_cpp_nonblocking_streams-opencl|examples_cpp_for_loops-dpcpp|examples_cpp_arrays-dpcpp|examples_cpp_nonblocking_streams-dpcpp" + - name: Run CTests diff --git a/examples/cpp/17_memory_pool/CMakeLists.txt b/examples/cpp/17_memory_pool/CMakeLists.txt new file mode 100644 index 000000000..1a6e5c668 --- /dev/null +++ b/examples/cpp/17_memory_pool/CMakeLists.txt @@ -0,0 +1,4 @@ +compile_cpp_example(memory_pool main.cpp) + +add_custom_target(cpp_example_memory_pool_okl ALL COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/addVectors.okl addVectors.okl) +add_dependencies(examples_cpp_memory_pool cpp_example_memory_pool_okl) diff --git a/examples/cpp/17_memory_pool/Makefile b/examples/cpp/17_memory_pool/Makefile new file mode 100644 index 000000000..bf13a8b42 --- /dev/null +++ b/examples/cpp/17_memory_pool/Makefile @@ -0,0 +1,30 @@ + +PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) + +ifndef OCCA_DIR + include $(PROJ_DIR)/../../../scripts/build/Makefile +else + include ${OCCA_DIR}/scripts/build/Makefile +endif + +#---[ COMPILATION ]------------------------------- +headers = $(wildcard $(incPath)/*.hpp) $(wildcard $(incPath)/*.tpp) +sources = $(wildcard $(srcPath)/*.cpp) + +objects = $(subst $(srcPath)/,$(objPath)/,$(sources:.cpp=.o)) + +executables: ${PROJ_DIR}/main + +${PROJ_DIR}/main: $(objects) $(headers) ${PROJ_DIR}/main.cpp + $(compiler) $(compilerFlags) -o ${PROJ_DIR}/main $(flags) $(objects) ${PROJ_DIR}/main.cpp $(paths) $(linkerFlags) + @if which install_name_tool > /dev/null 2>&1; then \ + install_name_tool -add_rpath "${OCCA_DIR}/lib" ${PROJ_DIR}/main; \ + fi + +$(objPath)/%.o:$(srcPath)/%.cpp $(wildcard $(subst $(srcPath)/,$(incPath)/,$(<:.cpp=.hpp))) $(wildcard $(subst $(srcPath)/,$(incPath)/,$(<:.cpp=.tpp))) + $(compiler) $(compilerFlags) -o $@ $(flags) -c $(paths) $< + +clean: + rm -f $(objPath)/*; + rm -f ${PROJ_DIR}/main; +#================================================= diff --git a/examples/cpp/17_memory_pool/README.md b/examples/cpp/17_memory_pool/README.md new file mode 100644 index 000000000..813659ce4 --- /dev/null +++ b/examples/cpp/17_memory_pool/README.md @@ -0,0 +1,28 @@ +# Example: Memory Pool + +A example showing the basics of using OCCA's memory pools + +- Creating an OCCA memory +- Managing memory pool +- Reserving and using memory from memory pool + +# Compiling the Example + +```bash +make +``` + +## Usage + +``` +> ./main --help + +Usage: ./main [OPTIONS] + +Example using memory pools + +Options: + -d, --device Device properties (default: "{mode: 'Serial'}") + -h, --help Print usage + -v, --verbose Compile kernels in verbose mode +``` diff --git a/examples/cpp/17_memory_pool/addVectors.okl b/examples/cpp/17_memory_pool/addVectors.okl new file mode 100644 index 000000000..1842d9dea --- /dev/null +++ b/examples/cpp/17_memory_pool/addVectors.okl @@ -0,0 +1,8 @@ +@kernel void addVectors(const int entries, + const float *a, + const float *b, + float *ab) { + for (int i = 0; i < entries; ++i; @tile(4, @outer, @inner)) { + ab[i] = a[i] + b[i]; + } +} diff --git a/examples/cpp/17_memory_pool/main.cpp b/examples/cpp/17_memory_pool/main.cpp new file mode 100644 index 000000000..ef4929f95 --- /dev/null +++ b/examples/cpp/17_memory_pool/main.cpp @@ -0,0 +1,286 @@ +#include + +#include + +//---[ Internal Tools ]----------------- +// Note: These headers are not officially supported +// Please don't rely on it outside of the occa examples +#include +#include +//====================================== + +occa::json parseArgs(int argc, const char **argv); + +int main(int argc, const char **argv) { + occa::json args = parseArgs(argc, argv); + + int entries = 12; + + float *a = new float[entries]; + float *b = new float[entries]; + float *c = new float[entries]; + float *check = new float[entries]; + + for (int i = 0; i < entries; ++i) { + a[i] = 1*(i+1); + b[i] = 2*(i+1); + c[i] = 3*(i+1); + } + + //---[ Device Setup ]------------------------------------- + occa::device device( (std::string) args["options/device"]); + + //---[ Memory Pool Setup ]------------------------------------- + occa::json properties; + properties["verbose"] = args["options/verbose"]; + + occa::experimental::memoryPool memPool = device.createMemoryPool(properties); + + int alignment = memPool.alignment(); + + std::cout << "Mempool Creation: alignment = " << alignment << std::endl; + if (static_cast(entries) > alignment/sizeof(float)) { + std::cerr << "Example assumes vector lengths are less than mempool alignment." << std::endl; + throw 1; + } + + std::cout << "Memory pool: Size = " << memPool.size() << " bytes" + << ", Reserved = " << memPool.reserved() << " bytes" + << ", in " << memPool.numReservations() << " reservations" << std::endl; + + std::cout << "First reservation: " << entries*sizeof(float) << " bytes" << std::endl; + // Make a reservation from memory pool + // Memory pool is a single allocation, and consists of just o_ab + /* + |====o_a====| + */ + occa::memory o_a = memPool.reserve(entries); + + // Fill buffer + o_a.copyFrom(a); + + std::cout << "Memory pool: Size = " << memPool.size() << " bytes" + << ", Reserved = " << memPool.reserved() << " bytes" + << ", in " << memPool.numReservations() << " reservations" << std::endl; + + + // Unallocated occa::memory + occa::memory o_c; + + // New scope + { + std::cout << "Slicing Memory (no resize)" << std::endl; + /*Slicing o_ab will not trigger reallocation or + increase memoryPool's reservation size*/ + /* + |====o_a=====| + |a_h1=||a_h2=| + */ + occa::memory o_a_half1 = o_a.slice(0, entries/2); + occa::memory o_a_half2 = o_a.slice(entries/2); + + std::cout << "Memory pool: Size = " << memPool.size() << " bytes" + << ", Reserved = " << memPool.reserved() << " bytes" + << ", in " << memPool.numReservations() << " reservations" << std::endl; + + // Check the contents are what we expect + o_a_half1.copyTo(check); + for (int i = 0; i < entries/2; ++i) { + if (!occa::areBitwiseEqual(check[i], a[i])) { + throw 1; + } + } + + o_a_half2.copyTo(check); + for (int i = 0; i < entries/2; ++i) { + if (!occa::areBitwiseEqual(check[i], a[i+entries/2])) { + throw 1; + } + } + + std::cout << "Second reservation: " << entries*sizeof(float) << " bytes" << std::endl; + // Trigger a resize by requesting a new reservation: + /* + |====o_a=====||====o_b=====| + |a_h1=||a_h2=| + */ + occa::memory o_b = memPool.reserve(entries); + + // Fill buffer + o_b.copyFrom(b); + + + // Check o_a still has its data after resize + o_a.copyTo(check); + for (int i = 0; i < entries; ++i) { + if (!occa::areBitwiseEqual(check[i], a[i])) { + throw 1; + } + } + + o_a_half1.copyTo(check); + for (int i = 0; i < entries/2; ++i) { + if (!occa::areBitwiseEqual(check[i], a[i])) { + throw 1; + } + } + + o_a_half2.copyTo(check); + for (int i = 0; i < entries/2; ++i) { + if (!occa::areBitwiseEqual(check[i], a[i+entries/2])) { + throw 1; + } + } + + //Destroy slices + std::cout << "Destroy slices" << std::endl; + o_a_half1.free(); + o_a_half2.free(); + + std::cout << "Memory pool: Size = " << memPool.size() << " bytes" + << ", Reserved = " << memPool.reserved() << " bytes" + << ", in " << memPool.numReservations() << " reservations" << std::endl; + + std::cout << "Third reservation: " << entries*sizeof(float) << " bytes" << std::endl; + // Trigger another resize by reserving the outer-scope mem + /* + |====o_a=====||====o_b=====||====o_c=====| + |a_h1=||a_h2=| + */ + o_c = memPool.reserve(entries); + + // Fill buffer + o_c.copyFrom(c); + + std::cout << "Memory pool: Size = " << memPool.size() << " bytes" + << ", Reserved = " << memPool.reserved() << " bytes" + << ", in " << memPool.numReservations() << " reservations" << std::endl; + + // Check o_a and o_b have data after resize + o_a.copyTo(check); + for (int i = 0; i < entries; ++i) { + if (!occa::areBitwiseEqual(check[i], a[i])) { + throw 1; + } + } + + o_b.copyTo(check); + for (int i = 0; i < entries; ++i) { + if (!occa::areBitwiseEqual(check[i], b[i])) { + throw 1; + } + } + } + + std::cout << "Free second reservation" << std::endl; + // o_b leaves scope and is destroyed. This leaves a 'hole' in the memory pool + /* + |====o_a=====||------------||====o_c=====| + */ + std::cout << "Memory pool: Size = " << memPool.size() << " bytes" + << ", Reserved = " << memPool.reserved() << " bytes" + << ", in " << memPool.numReservations() << " reservations" << std::endl; + + std::cout << "Re-reserve " << entries*sizeof(float) << " bytes" << std::endl; + // Request a new reservation, should fit in the hole and not trigger resize: + /* + |====o_a=====||====o_b=====||====o_c=====| + */ + occa::memory o_b = memPool.reserve(entries); + + std::cout << "Memory pool: Size = " << memPool.size() << " bytes" + << ", Reserved = " << memPool.reserved() << " bytes" + << ", in " << memPool.numReservations() << " reservations" << std::endl; + + std::cout << "Free again" << std::endl; + // Freeing doesnt change the mempool size, only the reserved size + /* + |====o_a=====||------------||====o_c=====| + */ + o_b.free(); + + std::cout << "Reserve " << (entries + alignment/sizeof(float))*sizeof(float) + << " bytes" << std::endl; + // Requesting a new reservation that *doesn't* fit in the hole triggers a resize. + // The mempool is defragmented on resizing + /* + |====o_a====||====o_c=====||========o_b=========| + */ + o_b = memPool.reserve(entries + alignment/sizeof(float)); + + std::cout << "Memory pool: Size = " << memPool.size() << " bytes" + << ", Reserved = " << memPool.reserved() << " bytes" + << ", in " << memPool.numReservations() << " reservations" << std::endl; + + // Check o_a and o_c have data after resize + o_a.copyTo(check); + for (int i = 0; i < entries; ++i) { + if (!occa::areBitwiseEqual(check[i], a[i])) { + throw 1; + } + } + o_c.copyTo(check); + for (int i = 0; i < entries; ++i) { + if (!occa::areBitwiseEqual(check[i], c[i])) { + throw 1; + } + } + + std::cout << "Free third reservation" << std::endl; + // Free a reserved memory then "re-size to fit" + /* + |====o_a=====||------------||========o_b=========| + */ + o_c.free(); + + std::cout << "Shrink to fit" << std::endl; + /* + |====o_a=====||========o_b=========| + */ + memPool.shrinkToFit(); + + std::cout << "Memory pool: Size = " << memPool.size() << " bytes" + << ", Reserved = " << memPool.reserved() << " bytes" + << ", in " << memPool.numReservations() << " reservations" << std::endl; + + + std::cout << "Set aligment to " << 4*memPool.alignment() + << " bytes" << std::endl; + // Set the alignment of the memory pool (triggers a re-allcation) + memPool.setAlignment(4*memPool.alignment()); + + std::cout << "Memory pool: Size = " << memPool.size() << " bytes" + << ", Reserved = " << memPool.reserved() << " bytes" + << ", in " << memPool.numReservations() << " reservations" << std::endl; + + // Free host memory + delete [] a; + delete [] b; + delete [] c; + delete [] check; + + return 0; +} + +occa::json parseArgs(int argc, const char **argv) { + occa::cli::parser parser; + parser + .withDescription( + "Example using memory pool" + ) + .addOption( + occa::cli::option('d', "device", + "Device properties (default: \"{mode: 'Serial'}\")") + .withArg() + .withDefaultValue("{mode: 'Serial'}") + ) + .addOption( + occa::cli::option('v', "verbose", + "Compile kernels in verbose mode") + ); + + occa::json args = parser.parseArgs(argc, argv); + occa::settings()["kernel/verbose"] = args["options/verbose"]; + + return args; +} diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index 9d0aec26f..51a3b10c7 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -11,6 +11,7 @@ add_subdirectory(11_native_cuda_kernels) add_subdirectory(12_native_opencl_kernels) add_subdirectory(13_openmp_interop) add_subdirectory(14_cuda_interop) +add_subdirectory(17_memory_pool) add_subdirectory(18_nonblocking_streams) add_subdirectory(20_native_dpcpp_kernel) diff --git a/include/occa/c/base.h b/include/occa/c/base.h index e61b6e2ab..80e170dea 100644 --- a/include/occa/c/base.h +++ b/include/occa/c/base.h @@ -73,6 +73,10 @@ occaMemory occaTypedWrapMemory(const void *ptr, occaJson props); //====================================== +//---[ MemoryPool ]--------------------- +occaMemoryPool occaCreateMemoryPool(occaJson props); +//====================================== + OCCA_END_EXTERN_C #endif diff --git a/include/occa/c/device.h b/include/occa/c/device.h index c9cdbf0f1..b7635fb6b 100644 --- a/include/occa/c/device.h +++ b/include/occa/c/device.h @@ -91,6 +91,11 @@ occaMemory occaDeviceTypedWrapMemory(occaDevice device, occaJson props); //====================================== +//---[ MemoryPool ]--------------------- +occaMemoryPool occaDeviceCreateMemoryPool(occaDevice device, + occaJson props); +//====================================== + OCCA_END_EXTERN_C #endif diff --git a/include/occa/c/experimental.h b/include/occa/c/experimental.h index cb77a746a..ba35b05e1 100644 --- a/include/occa/c/experimental.h +++ b/include/occa/c/experimental.h @@ -2,6 +2,7 @@ #define OCCA_C_EXPERIMENTAL_HEADER #include +#include #ifdef OCCA_JIT # undef OCCA_JIT diff --git a/include/occa/c/experimental/memoryPool.h b/include/occa/c/experimental/memoryPool.h new file mode 100644 index 000000000..276186d45 --- /dev/null +++ b/include/occa/c/experimental/memoryPool.h @@ -0,0 +1,42 @@ +#ifndef OCCA_C_MEMORYPOOL_HEADER +#define OCCA_C_MEMORYPOOL_HEADER + +#include +#include + +OCCA_START_EXTERN_C + +bool occaMemoryPoolIsInitialized(occaMemoryPool memoryPool); + +occaDevice occaMemoryPoolGetDevice(occaMemoryPool memoryPool); + +const char* occaMemoryPoolMode(occaMemoryPool memoryPool); + +occaJson occaMemoryPoolGetProperties(occaMemoryPool memoryPool); + +occaUDim_t occaMemoryPoolSize(occaMemoryPool memoryPool); + +occaUDim_t occaMemoryPoolReserved(occaMemoryPool memoryPool); + +occaUDim_t occaMemoryPoolNumReservations(occaMemoryPool memoryPool); + +occaUDim_t occaMemoryPoolAlignment(occaMemoryPool memoryPool); + +void occaMemoryPoolResize(occaMemoryPool memoryPool, + const occaUDim_t bytes); + +void occaMemoryPoolShrinkToFit(occaMemoryPool memoryPool); + +occaMemory occaMemoryPoolReserve(occaMemoryPool memoryPool, + const occaUDim_t bytes); + +occaMemory occaMemoryPoolTypedReserve(occaMemoryPool memoryPool, + const occaUDim_t entries, + const occaDtype dtype); + +void occaMemoryPoolSetAlignment(occaMemoryPool memoryPool, + const occaUDim_t alignment); + +OCCA_END_EXTERN_C + +#endif diff --git a/include/occa/c/types.h b/include/occa/c/types.h index b56ecf443..7ac7c35db 100644 --- a/include/occa/c/types.h +++ b/include/occa/c/types.h @@ -44,6 +44,7 @@ typedef occaType occaDevice; typedef occaType occaKernel; typedef occaType occaKernelBuilder; typedef occaType occaMemory; +typedef occaType occaMemoryPool; typedef occaType occaStream; typedef occaType occaStreamTag; @@ -77,6 +78,7 @@ extern const int OCCA_DEVICE; extern const int OCCA_KERNEL; extern const int OCCA_KERNELBUILDER; extern const int OCCA_MEMORY; +extern const int OCCA_MEMORYPOOL; extern const int OCCA_STREAM; extern const int OCCA_STREAMTAG; diff --git a/include/occa/core/base.hpp b/include/occa/core/base.hpp index 84320a350..010ab1c0a 100644 --- a/include/occa/core/base.hpp +++ b/include/occa/core/base.hpp @@ -43,6 +43,8 @@ namespace occa { void setStream(stream s); streamTag tagStream(); + + experimental::memoryPool createMemoryPool(const occa::json &props = occa::json()); //==================================== //---[ Kernel Functions ]------------- diff --git a/include/occa/core/device.hpp b/include/occa/core/device.hpp index 3d901ef76..0022d0f51 100644 --- a/include/occa/core/device.hpp +++ b/include/occa/core/device.hpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -713,6 +714,21 @@ namespace occa { const dim_t entries, const dtype_t &dtype, const occa::json &props = occa::json()); + + // |---[ MemoryPool ]------------------ + /** + * @startDoc{createMemoryPool} + * + * Description: + * Creates and returns a new [[memoryPool]] to reserve [[memory]]. + * + * Returns: + * Newly created [[memoryPool]] + * + * @endDoc + */ + experimental::memoryPool createMemoryPool(const occa::json &props = occa::json()); + // |=============================== }; diff --git a/include/occa/core/memoryPool.hpp b/include/occa/core/memoryPool.hpp new file mode 100644 index 000000000..9bf60f39e --- /dev/null +++ b/include/occa/core/memoryPool.hpp @@ -0,0 +1,261 @@ +#ifndef OCCA_CORE_MEMORYPOOL_HEADER +#define OCCA_CORE_MEMORYPOOL_HEADER + +#include + +namespace occa { + class modeBuffer_t; class buffer; + class modeMemory_t; class memory; + class modeDevice_t; class device; + class modeMemoryPool_t; + + namespace experimental { + + class memoryPool : public gc::ringEntry_t { + friend class occa::modeMemoryPool_t; + + private: + modeMemoryPool_t *modeMemoryPool; + + public: + memoryPool(); + memoryPool(modeMemoryPool_t *modeMemoryPool_); + + memoryPool(const memoryPool &m); + memoryPool& operator = (const memoryPool &m); + ~memoryPool(); + + private: + void assertInitialized() const; + void setModeMemoryPool(modeMemoryPool_t *modeMemoryPool_); + void removeMemoryPoolRef(); + + public: + void dontUseRefs(); + + /** + * @startDoc{isInitialized} + * + * Description: + * Check whether the [[memoryPool]] has been intialized. + * + * Returns: + * Returns `true` if the [[memoryPool]] has been built successfully + * + * @endDoc + */ + bool isInitialized() const; + + memoryPool& swap(memoryPool &m); + + modeMemoryPool_t* getModeMemoryPool() const; + modeDevice_t* getModeDevice() const; + + /** + * @startDoc{getDevice} + * + * Description: + * Returns the [[device]] used to build the [[memoryPool]]. + * + * Returns: + * The [[device]] used to build the [[memoryPool]] + * + * @endDoc + */ + occa::device getDevice() const; + + /** + * @startDoc{mode} + * + * Description: + * Returns the mode of the [[device]] used to build the [[memoryPool]]. + * + * Returns: + * The `mode` string, such as `"Serial"`, `"CUDA"`, or `"HIP"`. + * + * @endDoc + */ + const std::string& mode() const; + + /** + * @startDoc{properties} + * + * Description: + * Get the properties used to build the [[memoryPool]]. + * + * Description: + * Returns the properties used to build the [[memoryPool]]. + * + * @endDoc + */ + const occa::json& properties() const; + + /** + * @startDoc{size} + * + * Description: + * Get the byte size of the allocated memoryPool + * + * @endDoc + */ + udim_t size() const; + + /** + * @startDoc{reserved} + * + * Description: + * Get the byte size of the memoryPool currently reserved + * + * @endDoc + */ + udim_t reserved() const; + + /** + * @startDoc{numReservations} + * + * Description: + * Get the number of currently active reservations in the memoryPool + * + * @endDoc + */ + udim_t numReservations() const; + + /** + * @startDoc{alignment} + * + * Description: + * Get the byte size of the memoryPool alignment + * + * @endDoc + */ + udim_t alignment() const; + + /** + * @startDoc{operator_equals[0]} + * + * Description: + * Compare if two memoryPool objects have the same references. + * + * Returns: + * If the references are the same, this returns `true` otherwise `false`. + * + * @endDoc + */ + bool operator == (const occa::experimental::memoryPool &other) const; + + /** + * @startDoc{operator_equals[1]} + * + * Description: + * Compare if two memoryPool objects have different references. + * + * Returns: + * If the references are different, this returns `true` otherwise `false`. + * + * @endDoc + */ + bool operator != (const occa::experimental::memoryPool &other) const; + + /** + * @startDoc{resize} + * + * Description: + * Resize the underlying device memory buffer in the memoryPool. + * An error will be thrown if the currently reserved space in the memoryPool + * is larger than bytes. + * + * Arguments: + * bytes: + * The size of the memory pool to allocate. + * @endDoc + */ + void resize(const udim_t bytes); + + /** + * @startDoc{shrinkToFit} + * + * Description: + * Resize the underlying device memory buffer in the memoryPool to fit only + * The currently active reservations. + * + * @endDoc + */ + void shrinkToFit(); + + /** + * @startDoc{free} + * + * Description: + * Free the device memoryPool. + * Calling [[memoryPool.isInitialized]] will return `false` now. + * + * @endDoc + */ + void free(); + + // |=============================== + + // |---[ Memory ]------------------ + /** + * @startDoc{reserve[0]} + * + * Description: + * Reserves memory on the device from memory pool and returns the [[memory]] handle. + * + * Overloaded Description: + * Uses the templated type to determine the type and bytes. + * + * Arguments: + * entries: + * The length of the allocated memory + * + * Returns: + * The reserved [[memory]] + * + * @endDoc + */ + template + occa::memory reserve(const dim_t entries); + + /** + * @startDoc{reserve[1]} + * + * Overloaded Description: + * Same but takes a [[dtype_t]] rather than a template parameter. + * + * Arguments: + * entries: + * The length of the allocated memory + * dtype: + * The [[dtype_t]] of what will be allocated, which defines the length of each entry + * + * Returns: + * The reserved [[memory]] + * + * @endDoc + */ + occa::memory reserve(const dim_t entries, + const dtype_t &dtype);\ + + /** + * @startDoc{setAlignment} + * + * Description: + * Set the buffer aligment of the memoryPool. + * May trigger a re-allocation of the memory pool if there + * are currently active reservations + * + * Arguments: + * alignment: + * The size of the alignment in bytes. + * @endDoc + */ + void setAlignment(const udim_t alignment); + }; + + } +} + +#include "memoryPool.tpp" + +#endif diff --git a/include/occa/core/memoryPool.tpp b/include/occa/core/memoryPool.tpp new file mode 100644 index 000000000..6907ca4c3 --- /dev/null +++ b/include/occa/core/memoryPool.tpp @@ -0,0 +1,8 @@ +namespace occa { +namespace experimental { + template + occa::memory memoryPool::reserve(const dim_t entries) { + return reserve(entries, occa::dtype::get()); + } +} +} diff --git a/src/c/base.cpp b/src/c/base.cpp index 171d8695c..5ce2b82a8 100644 --- a/src/c/base.cpp +++ b/src/c/base.cpp @@ -195,4 +195,18 @@ occaMemory occaTypedWrapMemory(const void *ptr, } //====================================== +//---[ MemoryPool ]--------------------- +occaMemoryPool occaCreateMemoryPool(occaJson props) { + occa::experimental::memoryPool memPool; + if (occa::c::isDefault(props)) { + memPool = occa::createMemoryPool(); + } else { + memPool = occa::createMemoryPool(occa::c::json(props)); + } + memPool.dontUseRefs(); + + return occa::c::newOccaType(memPool); +} +//====================================== + OCCA_END_EXTERN_C diff --git a/src/c/device.cpp b/src/c/device.cpp index 0f46b0359..81a8dc4a0 100644 --- a/src/c/device.cpp +++ b/src/c/device.cpp @@ -254,4 +254,21 @@ occaMemory occaDeviceTypedWrapMemory(occaDevice device, } //====================================== +//---[ MemoryPool ]--------------------- +occaMemoryPool occaDeviceCreateMemoryPool(occaDevice device, + occaJson props) { + occa::device device_ = occa::c::device(device); + + occa::experimental::memoryPool memoryPool; + if (occa::c::isDefault(props)) { + memoryPool = device_.createMemoryPool(); + } else { + memoryPool = device_.createMemoryPool(occa::c::json(props)); + } + memoryPool.dontUseRefs(); + + return occa::c::newOccaType(memoryPool); +} +//====================================== + OCCA_END_EXTERN_C diff --git a/src/c/experimental/memoryPool.cpp b/src/c/experimental/memoryPool.cpp new file mode 100644 index 000000000..a41db35c7 --- /dev/null +++ b/src/c/experimental/memoryPool.cpp @@ -0,0 +1,74 @@ +#include +#include +#include +#include + +OCCA_START_EXTERN_C + +bool occaMemoryPoolIsInitialized(occaMemoryPool memoryPool) { + return occa::c::memoryPool(memoryPool).isInitialized(); +} + +occaDevice occaMemoryPoolGetDevice(occaMemoryPool memoryPool) { + return occa::c::newOccaType( + occa::c::memoryPool(memoryPool).getDevice() + ); +} + +occaJson occaMemoryPoolGetProperties(occaMemoryPool memoryPool) { + return occa::c::newOccaType( + occa::c::memoryPool(memoryPool).properties(), + false + ); +} + +occaUDim_t occaMemoryPoolSize(occaMemoryPool memoryPool) { + return occa::c::memoryPool(memoryPool).size(); +} + +occaUDim_t occaMemoryPoolReserved(occaMemoryPool memoryPool) { + return occa::c::memoryPool(memoryPool).reserved(); +} + +occaUDim_t occaMemoryPoolNumReservations(occaMemoryPool memoryPool) { + return occa::c::memoryPool(memoryPool).numReservations(); +} + +occaUDim_t occaMemoryPoolAlignment(occaMemoryPool memoryPool) { + return occa::c::memoryPool(memoryPool).alignment(); +} + +void occaMemoryPoolResize(occaMemoryPool memoryPool, + const occaUDim_t bytes) { + occa::c::memoryPool(memoryPool).resize(bytes); +} + +void occaMemoryPoolShrinkToFit(occaMemoryPool memoryPool) { + occa::c::memoryPool(memoryPool).shrinkToFit(); +} + +occaMemory occaMemoryPoolReserve(occaDevice device, + const occaUDim_t bytes) { + return occaMemoryPoolTypedReserve(device, + bytes, + occaDtypeByte); +} + +occaMemory occaMemoryPoolTypedReserve(occaMemoryPool memoryPool, + const occaUDim_t entries, + const occaDtype dtype) { + occa::experimental::memoryPool memoryPool_ = occa::c::memoryPool(memoryPool); + const occa::dtype_t &dtype_ = occa::c::dtype(dtype); + + occa::memory memory = memoryPool_.reserve(entries, dtype_); + memory.dontUseRefs(); + + return occa::c::newOccaType(memory); +} + +void occaMemoryPoolSetAlignment(occaMemoryPool memoryPool, + const occaUDim_t alignment) { + occa::c::memoryPool(memoryPool).setAlignment(alignment); +} + +OCCA_END_EXTERN_C diff --git a/src/core/base.cpp b/src/core/base.cpp index 29de088cd..784dfc295 100644 --- a/src/core/base.cpp +++ b/src/core/base.cpp @@ -77,6 +77,10 @@ namespace occa { return getDevice().tagStream(); } + experimental::memoryPool createMemoryPool(const occa::json &props) { + return getDevice().createMemoryPool(props); + } + //---[ Kernel Functions ]------------- kernel buildKernel(const std::string &filename, const std::string &kernelName, diff --git a/src/core/device.cpp b/src/core/device.cpp index e4fd62e68..a11c71612 100644 --- a/src/core/device.cpp +++ b/src/core/device.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -522,6 +523,16 @@ namespace occa { return mem; } + + memoryPool device::createMemoryPool(const occa::json &props) { + assertInitialized(); + + occa::json memProps = memoryProperties(props); + + memoryPool memPool(modeDevice->createMemoryPool(memProps)); + + return memPool; + } // |================================= template <> diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 03fbf7ccd..d421485a8 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -327,12 +327,15 @@ namespace occa { void memory::free() { if (modeMemory == NULL) return; - modeMemory->free(); + delete modeMemory; + modeMemory = nullptr; } void memory::detach() { if (modeMemory == NULL) return; modeMemory->detach(); + delete modeMemory; + modeMemory = nullptr; } memory null; diff --git a/src/core/memoryPool.cpp b/src/core/memoryPool.cpp new file mode 100644 index 000000000..cab3f988f --- /dev/null +++ b/src/core/memoryPool.cpp @@ -0,0 +1,182 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace occa { + memoryPool::memoryPool() : + modeMemoryPool(NULL) {} + + memoryPool::memoryPool(modeMemoryPool_t *modeMemoryPool_) : + modeMemoryPool(NULL) { + setModeMemoryPool(modeMemoryPool_); + } + + memoryPool::memoryPool(const memoryPool &m) : + modeMemoryPool(NULL) { + setModeMemoryPool(m.modeMemoryPool); + } + + memoryPool& memoryPool::operator = (const memoryPool &m) { + setModeMemoryPool(m.modeMemoryPool); + return *this; + } + + memoryPool::~memoryPool() { + removeMemoryPoolRef(); + } + + void memoryPool::assertInitialized() const { + OCCA_ERROR("MemoryPool not initialized or has been freed", + modeMemoryPool != NULL); + } + + void memoryPool::setModeMemoryPool(modeMemoryPool_t *modeMemoryPool_) { + if (modeMemoryPool != modeMemoryPool_) { + removeMemoryPoolRef(); + modeMemoryPool = modeMemoryPool_; + if (modeMemoryPool) { + modeMemoryPool->addMemoryPoolRef(this); + } + } + } + + void memoryPool::removeMemoryPoolRef() { + if (!modeMemoryPool) { + return; + } + modeMemoryPool->removeMemoryPoolRef(this); + if (modeMemoryPool->modeMemoryPool_t::needsFree()) { + delete modeMemoryPool; + modeMemoryPool = NULL; + } + } + + void memoryPool::dontUseRefs() { + if (modeMemoryPool) { + modeMemoryPool->modeMemoryPool_t::dontUseRefs(); + } + } + + bool memoryPool::isInitialized() const { + return (modeMemoryPool != NULL); + } + + memoryPool& memoryPool::swap(memoryPool &m) { + modeMemoryPool_t *modeMemoryPool_ = modeMemoryPool; + modeMemoryPool = m.modeMemoryPool; + m.modeMemoryPool = modeMemoryPool_; + return *this; + } + + modeMemoryPool_t* memoryPool::getModeMemoryPool() const { + return modeMemoryPool; + } + + modeDevice_t* memoryPool::getModeDevice() const { + return (modeMemoryPool + ? modeMemoryPool->modeDevice + : nullptr); + } + + occa::device memoryPool::getDevice() const { + return occa::device(modeMemoryPool + ? modeMemoryPool->modeDevice + : NULL); + } + + const std::string& memoryPool::mode() const { + static const std::string noMode = "No Mode"; + return (modeMemoryPool + ? modeMemoryPool->modeDevice->mode + : noMode); + } + + const occa::json& memoryPool::properties() const { + static const occa::json noProperties; + return (modeMemoryPool + ? modeMemoryPool->properties + : noProperties); + } + + udim_t memoryPool::size() const { + if (modeMemoryPool == NULL) { + return 0; + } + return modeMemoryPool->size; + } + + udim_t memoryPool::reserved() const { + if (modeMemoryPool == NULL) { + return 0; + } + return modeMemoryPool->reserved; + } + + udim_t memoryPool::numReservations() const { + if (modeMemoryPool == NULL) { + return 0; + } + return modeMemoryPool->numReservations(); + } + + udim_t memoryPool::alignment() const { + if (modeMemoryPool == NULL) { + return 0; + } + return modeMemoryPool->alignment; + } + + bool memoryPool::operator == (const occa::memoryPool &other) const { + return (modeMemoryPool == other.modeMemoryPool); + } + + bool memoryPool::operator != (const occa::memoryPool &other) const { + return (modeMemoryPool != other.modeMemoryPool); + } + + void memoryPool::resize(const udim_t bytes) { + assertInitialized(); + modeMemoryPool->resize(bytes); + } + + void memoryPool::shrinkToFit() { + resize(reserved()); + } + + void memoryPool::free() { + if (modeMemoryPool == NULL) return; + delete modeMemoryPool; + } + + memory memoryPool::reserve(const dim_t entries, + const dtype_t &dtype) { + assertInitialized(); + + if (entries == 0) { + return memory(); + } + + const dim_t bytes = entries * dtype.bytes(); + OCCA_ERROR("Trying to reserve negative bytes (" << bytes << ")", + bytes >= 0); + + memory mem(modeMemoryPool->reserve(bytes)); + mem.setDtype(dtype); + + return mem; + } + + template <> + memory memoryPool::reserve(const dim_t entries) { + return reserve(entries, dtype::byte); + } + + void memoryPool::setAlignment(const udim_t alignment) { + assertInitialized(); + modeMemoryPool->setAlignment(alignment); + } +} diff --git a/src/occa/internal/c/types.cpp b/src/occa/internal/c/types.cpp index 7fdfea399..df5c0d027 100644 --- a/src/occa/internal/c/types.cpp +++ b/src/occa/internal/c/types.cpp @@ -2,6 +2,7 @@ #include #include +#include namespace occa { namespace c { @@ -246,6 +247,21 @@ namespace occa { return oType; } + occaType newOccaType(occa::experimental::memoryPool memoryPool) { + occa::modeMemoryPool_t *modeMemoryPool = memoryPool.getModeMemoryPool(); + if (!modeMemoryPool) { + return occaUndefined; + } + + occaType oType; + oType.magicHeader = OCCA_C_TYPE_MAGIC_HEADER; + oType.type = typeType::memoryPool; + oType.bytes = sizeof(void*); + oType.value.ptr = (char*) modeMemoryPool; + oType.needsFree = false; + return oType; + } + occaType newOccaType(occa::stream stream) { occa::modeStream_t *modeStream = stream.getModeStream(); if (!modeStream) { @@ -370,6 +386,15 @@ namespace occa { return occa::memory((occa::modeMemory_t*) value.value.ptr); } + occa::experimental::memoryPool memoryPool(occaType value) { + if (occaIsUndefined(value)) { + return occa::experimental::memoryPool(); + } + OCCA_ERROR("Input is not an occaMemoryPool", + value.type == typeType::memoryPool); + return occa::experimental::memoryPool((occa::modeMemoryPool_t*) value.value.ptr); + } + occa::stream stream(occaType value) { if (occaIsUndefined(value)) { return occa::stream(); @@ -619,6 +644,7 @@ const int OCCA_DEVICE = occa::c::typeType::device; const int OCCA_KERNEL = occa::c::typeType::kernel; const int OCCA_KERNELBUILDER = occa::c::typeType::kernelBuilder; const int OCCA_MEMORY = occa::c::typeType::memory; +const int OCCA_MEMORYPOOL = occa::c::typeType::memoryPool; const int OCCA_STREAM = occa::c::typeType::stream; const int OCCA_STREAMTAG = occa::c::typeType::streamTag; @@ -777,6 +803,10 @@ void occaFree(occaType *value) { occa::c::memory(valueRef).free(); break; } + case occa::c::typeType::memoryPool: { + occa::c::memoryPool(valueRef).free(); + break; + } case occa::c::typeType::stream: { occa::c::stream(valueRef).free(); break; @@ -947,6 +977,20 @@ void occaPrintTypeInfo(occaType value) { break; } + case occa::c::typeType::memoryPool: { + info["type"] = "memoryPool"; + info["value"] = (void*) value.value.ptr; + + occa::experimental::memoryPool memPool = occa::c::memoryPool(value); + if (memPool.isInitialized()) { + info["mode"] = memPool.mode(); + info["props"] = memPool.properties(); + } else { + info["initialized"] = false; + } + + break; + } case occa::c::typeType::stream: { info["type"] = "stream"; info["value"] = (void*) value.value.ptr; diff --git a/src/occa/internal/c/types.hpp b/src/occa/internal/c/types.hpp index 7c2b591f0..9524413e9 100644 --- a/src/occa/internal/c/types.hpp +++ b/src/occa/internal/c/types.hpp @@ -37,12 +37,13 @@ namespace occa { static const int kernel = 18; static const int kernelBuilder = 19; static const int memory = 20; - static const int stream = 21; - static const int streamTag = 22; + static const int memoryPool = 21; + static const int stream = 22; + static const int streamTag = 23; - static const int dtype = 23; - static const int scope = 24; - static const int json = 25; + static const int dtype = 24; + static const int scope = 25; + static const int json = 26; } occaType defaultOccaType(); @@ -97,6 +98,7 @@ namespace occa { occaType newOccaType(occa::device device); occaType newOccaType(occa::kernel kernel); occaType newOccaType(occa::memory memory); + occaType newOccaType(occa::experimental::memoryPool memoryPool); occaType newOccaType(occa::stream stream); occaType newOccaType(occa::streamTag streamTag); @@ -116,6 +118,7 @@ namespace occa { occa::kernel kernel(occaType value); occa::kernelBuilder kernelBuilder(occaType value); occa::memory memory(occaType value); + occa::experimental::memoryPool memoryPool(occaType value); occa::stream stream(occaType value); occa::streamTag streamTag(occaType value); diff --git a/src/occa/internal/core/buffer.cpp b/src/occa/internal/core/buffer.cpp index bc4f3177e..ac1d299d4 100644 --- a/src/occa/internal/core/buffer.cpp +++ b/src/occa/internal/core/buffer.cpp @@ -37,10 +37,6 @@ namespace occa { isWrapped = false; } - void modeBuffer_t::dontUseRefs() { - modeMemoryRing.dontUseRefs(); - } - void modeBuffer_t::addModeMemoryRef(modeMemory_t *mem) { modeMemoryRing.addRef(mem); } diff --git a/src/occa/internal/core/buffer.hpp b/src/occa/internal/core/buffer.hpp index 39ec5657d..57b16b0a5 100644 --- a/src/occa/internal/core/buffer.hpp +++ b/src/occa/internal/core/buffer.hpp @@ -26,23 +26,20 @@ namespace occa { udim_t size_, const occa::json &json_); - void dontUseRefs(); - void addModeMemoryRef(modeMemory_t *mem); - void removeModeMemoryRef(modeMemory_t *mem); - bool needsFree() const; //---[ Virtual Methods ]------------ virtual ~modeBuffer_t(); - virtual void malloc(udim_t bytes) = 0; + virtual bool needsFree() const; + virtual void addModeMemoryRef(modeMemory_t *mem); + virtual void removeModeMemoryRef(modeMemory_t *mem); - virtual void wrapMemory(const void *ptr, - const udim_t bytes) = 0; + virtual void malloc(udim_t bytes) {}; virtual modeMemory_t* slice(const dim_t offset_, const udim_t bytes) = 0; - virtual void detach() = 0; + virtual void detach() {}; }; } diff --git a/src/occa/internal/core/device.hpp b/src/occa/internal/core/device.hpp index 7b5d1b4d8..7d4dcb381 100644 --- a/src/occa/internal/core/device.hpp +++ b/src/occa/internal/core/device.hpp @@ -118,6 +118,8 @@ namespace occa { const udim_t bytes, const occa::json &props) = 0; + virtual modeMemoryPool_t* createMemoryPool(const occa::json &props)=0; + virtual udim_t memorySize() const = 0; // |=============================== //================================== diff --git a/src/occa/internal/core/memory.cpp b/src/occa/internal/core/memory.cpp index f2671e7ba..4f98132ad 100644 --- a/src/occa/internal/core/memory.cpp +++ b/src/occa/internal/core/memory.cpp @@ -32,7 +32,6 @@ namespace occa { void modeMemory_t::dontUseRefs() { memoryRing.dontUseRefs(); - if (modeBuffer) modeBuffer->dontUseRefs(); } void modeMemory_t::addMemoryRef(memory *mem) { @@ -44,29 +43,19 @@ namespace occa { } void modeMemory_t::removeModeMemoryRef() { - if (!modeBuffer) { - return; - } + if (modeBuffer == NULL) return; + modeBuffer->removeModeMemoryRef(this); - if (modeBuffer->modeBuffer_t::needsFree()) { - free(); + + if (modeBuffer->needsFree()) { + delete modeBuffer; } + modeBuffer = NULL; } void modeMemory_t::detach() { if (modeBuffer == NULL) return; - modeBuffer->detach(); - - //deleting the modeBuffer deletes all - // the modeMemory_t slicing it, and NULLs - // their wrappers - delete modeBuffer; - } - - void modeMemory_t::free() { - if (modeBuffer == NULL) return; - delete modeBuffer; } bool modeMemory_t::needsFree() const { @@ -74,7 +63,9 @@ namespace occa { } modeDevice_t* modeMemory_t::getModeDevice() const { - return modeBuffer->modeDevice; + return (modeBuffer + ? modeBuffer->modeDevice + : nullptr); } const occa::json& modeMemory_t::properties() const { diff --git a/src/occa/internal/core/memory.hpp b/src/occa/internal/core/memory.hpp index bf939183f..2f288efba 100644 --- a/src/occa/internal/core/memory.hpp +++ b/src/occa/internal/core/memory.hpp @@ -36,7 +36,6 @@ namespace occa { modeMemory_t* slice(const dim_t offset_, const udim_t bytes); - void free(); void detach(); //---[ Virtual Methods ]------------ diff --git a/src/occa/internal/core/memoryPool.cpp b/src/occa/internal/core/memoryPool.cpp new file mode 100644 index 000000000..44335cede --- /dev/null +++ b/src/occa/internal/core/memoryPool.cpp @@ -0,0 +1,347 @@ +#include +#include +#include +#include + +namespace occa { + + using experimental::memoryPool; + + modeMemoryPool_t::modeMemoryPool_t(modeDevice_t *modeDevice_, + const occa::json &properties_) : + modeBuffer_t(modeDevice_, 0, properties_), + alignment(128), + reserved(0), + buffer(nullptr) { + verbose = properties_.get("verbose", false); + } + + modeMemoryPool_t::~modeMemoryPool_t() { + // NULL all wrappers + while (memoryPoolRing.head) { + memoryPool *memPool = (memoryPool*) memoryPoolRing.head; + memoryPoolRing.removeRef(memPool); + memPool->modeMemoryPool = NULL; + } + if (buffer) delete buffer; + size=0; + } + + void modeMemoryPool_t::dontUseRefs() { + memoryPoolRing.dontUseRefs(); + } + + void modeMemoryPool_t::addMemoryPoolRef(memoryPool *memPool) { + memoryPoolRing.addRef(memPool); + } + + void modeMemoryPool_t::removeMemoryPoolRef(memoryPool *memPool) { + memoryPoolRing.removeRef(memPool); + } + + void modeMemoryPool_t::addModeMemoryRef(modeMemory_t *mem) { + modeMemoryRing.addRef(mem); + /*Find how much of this mem is a new reservation*/ + dim_t lo = (mem->offset / alignment) * alignment; //Round down to alignment + dim_t hi = ((mem->offset + mem->size + alignment - 1) + / alignment) * alignment; //Round up + for (modeMemory_t* m : reservations) { + const dim_t mlo = (m->offset / alignment) * alignment; + const dim_t mhi = ((m->offset + m->size + alignment - 1) + / alignment) * alignment; + if (mlo >= hi) break; + if (mhi <= lo) continue; + + if (mlo <= lo && mhi >= hi) { + hi = lo; + } else { + hi = std::min(hi, mhi); + lo = std::max(lo, mlo); + } + if (lo == hi) break; + } + /*Add this mem to the reservation list*/ + reservations.emplace(mem); + reserved += hi-lo; + } + + void modeMemoryPool_t::removeModeMemoryRef(modeMemory_t *mem) { + modeMemoryRing.removeRef(mem); + + /*Remove this mem from the reservation list*/ + auto pos = reservations.find(mem); + reservations.erase(pos); + + /*Find how much of this mem is removed from reserved space*/ + dim_t lo = (mem->offset / alignment) * alignment; //Round down to alignment + dim_t hi = ((mem->offset + mem->size + alignment - 1) + / alignment) * alignment; //Round up + for (modeMemory_t* m : reservations) { + const dim_t mlo = (m->offset / alignment) * alignment; + const dim_t mhi = ((m->offset + m->size + alignment - 1) + / alignment) * alignment; + if (mlo >= hi) break; + if (mhi <= lo) continue; + + if (mlo <= lo && mhi >= hi) { + hi = lo; + } else { + hi = std::min(hi, mhi); + lo = std::max(lo, mlo); + } + if (lo == hi) break; + } + reserved -= hi-lo; + } + + bool modeMemoryPool_t::needsFree() const { + return memoryPoolRing.needsFree(); + } + + udim_t modeMemoryPool_t::numReservations() const { + return reservations.size(); + } + + modeMemory_t* modeMemoryPool_t::reserve(const udim_t bytes) { + + const udim_t alignedBytes = ((bytes + alignment - 1) / alignment) * alignment; + + /*If pool is too small, resize and put the new reservation at the end*/ + if (reserved + bytes > size) { + resize(reserved + alignedBytes); + return slice(reserved, bytes); + } + + /*If pool is empty, put reservation at the beginning*/ + if (reservations.size()==0) { + return slice(0, bytes); + } + + /*Look for a unreserved region which fits request*/ + dim_t offset = 0; + for (modeMemory_t* m : reservations) { + const dim_t mlo = m->offset; + const dim_t mhi = ((m->offset + m->size + alignment - 1) + / alignment) * alignment; //Round up upper limit + if (mlo >= static_cast(offset + bytes)) break; /*Found a suitable empty space*/ + + offset = std::max(offset, mhi); /*Shift the potential region*/ + } + + if (offset + bytes <= size) { + return slice(offset, bytes); + } else { + resize(reserved + alignedBytes); + return slice(reserved, bytes); + } + } + + void modeMemoryPool_t::resize(const udim_t bytes) { + + OCCA_ERROR("Cannot resize memoryPool below current usage" + "(reserved: " << reserved << ", bytes: " << bytes << ")", + reserved <= bytes); + + if (size == bytes) return; /*Nothing to do*/ + + const udim_t alignedBytes = ((bytes + alignment - 1) / alignment) * alignment; + + if (verbose) { + io::stdout << "MemoryPool: Resizing to " << alignedBytes << " bytes\n"; + } + + if (reservations.size() == 0) { + /* + If there are no outstanding reservations, + destroy the allocation and re-make it + */ + if (buffer) delete buffer; + + buffer = makeBuffer(); + buffer->malloc(alignedBytes); + size = alignedBytes; + + modeDevice->bytesAllocated += alignedBytes; + modeDevice->maxBytesAllocated = std::max( + modeDevice->maxBytesAllocated, modeDevice->bytesAllocated + ); + + } else { + /* + There are currently reservations. + Make a new allocation and migrate reserved space to new allocation + packing the space in the process + */ + modeBuffer_t* newBuffer = makeBuffer(); + newBuffer->malloc(alignedBytes); + + modeDevice->bytesAllocated += alignedBytes; + modeDevice->maxBytesAllocated = std::max( + modeDevice->maxBytesAllocated, modeDevice->bytesAllocated + ); + + /*Loop through the reservation list*/ + auto it = reservations.begin(); + modeMemory_t* m = *it; + dim_t lo = m->offset; /*Start point of current block*/ + dim_t hi = lo + m->size; /*End point of current block*/ + dim_t offset = 0; + udim_t newReserved = 0; + setPtr(m, newBuffer, offset); + do { + + it++; + + if (it == reservations.end()) { + /*If this reservation is the last one, copy the block and we're done*/ + memcpy(newBuffer, offset, buffer, lo, hi - lo); + newReserved += ((hi - lo + alignment - 1) / alignment) * alignment; + } else { + /*Look at next reservation*/ + m = *it; + const dim_t mlo = m->offset; + const dim_t mhi = m->offset + m->size; + if (mlo > hi) { + /* + If the start point of the next reservation is in a new block + copy the last block to the new allocation + */ + memcpy(newBuffer, offset, buffer, lo, hi - lo); + const udim_t reservationSize = ((hi - lo + alignment - 1) / alignment) * alignment; + newReserved += reservationSize; + + /*Increment offset, and track start/end of current block*/ + offset += reservationSize; + lo = mlo; + hi = mhi; + } else { + /* + Reservation is in the same block. + Extend end point of current block + */ + hi = std::max(hi, mhi); + } + /*Update the buffer of this reservation*/ + setPtr(m, newBuffer, m->offset - (lo - offset)); + } + } while (it != reservations.end()); + + /*Clean up old buffer*/ + delete buffer; + + buffer = newBuffer; + size = alignedBytes; + reserved = newReserved; + } + } + + void modeMemoryPool_t::setAlignment(const udim_t newAlignment) { + + OCCA_ERROR("Cannot set memoryPool alignment to zero bytes", + newAlignment != 0); + + if (alignment == newAlignment) return; /*Nothing to do*/ + + if (reservations.size() != 0) { + /* + There are currently reservations. + Figure out the size of the new buffer needed + */ + /*Loop through the reservation list*/ + auto it = reservations.begin(); + modeMemory_t* m = *it; + dim_t lo = m->offset; /*Start point of current block*/ + dim_t hi = lo + m->size; /*End point of current block*/ + udim_t newReserved = 0; + do { + it++; + if (it == reservations.end()) { + newReserved += ((hi - lo + newAlignment - 1) / newAlignment) * newAlignment; + } else { + /*Look at next reservation*/ + m = *it; + const dim_t mlo = m->offset; + const dim_t mhi = m->offset + m->size; + if (mlo > hi) { + /* + If the start point of the next reservation is in a new block + */ + const udim_t reservationSize = ((hi - lo + newAlignment - 1) / newAlignment) * newAlignment; + newReserved += reservationSize; + + /*Track start/end of current block*/ + lo = mlo; + hi = mhi; + } else { + /* + Reservation is in the same block. + Extend end point of current block + */ + hi = std::max(hi, mhi); + } + } + } while (it != reservations.end()); + + /*Make a new buffer*/ + modeBuffer_t* newBuffer = makeBuffer(); + newBuffer->malloc(newReserved); + + modeDevice->bytesAllocated += newReserved; + modeDevice->maxBytesAllocated = std::max( + modeDevice->maxBytesAllocated, modeDevice->bytesAllocated + ); + + /*Loop through the reservation list and migrate to new alignment*/ + it = reservations.begin(); + m = *it; + lo = m->offset; /*Start point of current block*/ + hi = lo + m->size; /*End point of current block*/ + dim_t offset = 0; + setPtr(m, newBuffer, offset); + do { + + it++; + + if (it == reservations.end()) { + /*If this reservation is the last one, copy the block and we're done*/ + memcpy(newBuffer, offset, buffer, lo, hi - lo); + } else { + /*Look at next reservation*/ + m = *it; + const dim_t mlo = m->offset; + const dim_t mhi = m->offset + m->size; + if (mlo > hi) { + /* + If the start point of the next reservation is in a new block + copy the last block to the new allocation + */ + memcpy(newBuffer, offset, buffer, lo, hi - lo); + const udim_t reservationSize = ((hi - lo + newAlignment - 1) / newAlignment) * newAlignment; + + /*Increment offset, and track start/end of current block*/ + offset += reservationSize; + lo = mlo; + hi = mhi; + } else { + /* + Reservation is in the same block. + Extend end point of current block + */ + hi = std::max(hi, mhi); + } + /*Update the buffer of this reservation*/ + setPtr(m, newBuffer, m->offset - (lo - offset)); + } + } while (it != reservations.end()); + + /*Clean up old buffer*/ + delete buffer; + + buffer = newBuffer; + size = newReserved; + reserved = newReserved; + } + + alignment = newAlignment; + } +} diff --git a/src/occa/internal/core/memoryPool.hpp b/src/occa/internal/core/memoryPool.hpp new file mode 100644 index 000000000..7d2233fda --- /dev/null +++ b/src/occa/internal/core/memoryPool.hpp @@ -0,0 +1,60 @@ +#ifndef OCCA_INTERNAL_CORE_MEMORYPOOL_HEADER +#define OCCA_INTERNAL_CORE_MEMORYPOOL_HEADER + +#include +#include +#include + +namespace occa { + using experimental::memoryPool; + + class modeMemoryPool_t : public modeBuffer_t { + public: + struct compare { + bool operator()(const modeMemory_t* a, const modeMemory_t* b) const { + return (a->offset < b->offset) || + (a->offset == b->offset && a->size < b->size); + }; + }; + typedef std::set reservationSet; + + gc::ring_t memoryPoolRing; + + reservationSet reservations; + + udim_t alignment; + udim_t reserved; + + modeBuffer_t* buffer; + + bool verbose; + + modeMemoryPool_t(modeDevice_t *modeDevice_, + const occa::json &json_); + ~modeMemoryPool_t(); + + udim_t numReservations() const; + + modeMemory_t* reserve(const udim_t bytes); + + void resize(const udim_t bytes); + + void setAlignment(const udim_t newAlignment); + + void dontUseRefs(); + bool needsFree() const override; + void addMemoryPoolRef(memoryPool *memPool); + void removeMemoryPoolRef(memoryPool *memPool); + void addModeMemoryRef(modeMemory_t *mem) override; + void removeModeMemoryRef(modeMemory_t *mem) override; + + private: + virtual modeBuffer_t* makeBuffer()=0; + virtual void setPtr(modeMemory_t* mem, modeBuffer_t* buf, const dim_t offset)=0; + virtual void memcpy(modeBuffer_t* dst, const dim_t dstOffset, + modeBuffer_t* src, const dim_t srcOffset, + const udim_t bytes) = 0; + }; +} + +#endif diff --git a/src/occa/internal/modes/cuda/buffer.hpp b/src/occa/internal/modes/cuda/buffer.hpp index 8c1569425..925c565c9 100644 --- a/src/occa/internal/modes/cuda/buffer.hpp +++ b/src/occa/internal/modes/cuda/buffer.hpp @@ -9,24 +9,30 @@ namespace occa { namespace cuda { + class memory; + class memoryPool; + class buffer : public occa::modeBuffer_t { + friend class cuda::memory; + friend class cuda::memoryPool; + public: buffer(modeDevice_t *modeDevice_, udim_t size_, const occa::json &properties_ = occa::json()); ~buffer(); - void malloc(udim_t bytes); + void malloc(udim_t bytes) override; void wrapMemory(const void *ptr, const udim_t bytes); modeMemory_t* slice(const dim_t offset, - const udim_t bytes); + const udim_t bytes) override; - void detach(); + void detach() override; - public: + private: CUdeviceptr cuPtr; bool isUnified; bool useHostPtr; diff --git a/src/occa/internal/modes/cuda/device.cpp b/src/occa/internal/modes/cuda/device.cpp index 859c03652..47ac92567 100644 --- a/src/occa/internal/modes/cuda/device.cpp +++ b/src/occa/internal/modes/cuda/device.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -442,6 +443,10 @@ namespace occa { return new cuda::memory(buf, bytes, 0); } + modeMemoryPool_t* device::createMemoryPool(const occa::json &props) { + return new cuda::memoryPool(this, props); + } + udim_t device::memorySize() const { return cuda::getDeviceMemorySize(cuDevice); } diff --git a/src/occa/internal/modes/cuda/device.hpp b/src/occa/internal/modes/cuda/device.hpp index 9965bcaf7..060732cf9 100644 --- a/src/occa/internal/modes/cuda/device.hpp +++ b/src/occa/internal/modes/cuda/device.hpp @@ -97,6 +97,8 @@ namespace occa { const udim_t bytes, const occa::json &props); + modeMemoryPool_t* createMemoryPool(const occa::json &props); + virtual udim_t memorySize() const; //================================ }; diff --git a/src/occa/internal/modes/cuda/memory.cpp b/src/occa/internal/modes/cuda/memory.cpp index 07edb934a..df47d0dbe 100644 --- a/src/occa/internal/modes/cuda/memory.cpp +++ b/src/occa/internal/modes/cuda/memory.cpp @@ -5,10 +5,23 @@ namespace occa { namespace cuda { - memory::memory(modeBuffer_t *modeBuffer_, + memory::memory(buffer *b, udim_t size_, dim_t offset_) : - occa::modeMemory_t(modeBuffer_, size_, offset_) { - buffer *b = dynamic_cast(modeBuffer); + occa::modeMemory_t(b, size_, offset_) { + isUnified = b->isUnified; + useHostPtr = b->useHostPtr; + if (isUnified || useHostPtr) { + ptr = b->ptr + offset; + } + if (isUnified || !useHostPtr) { + cuPtr = b->cuPtr + offset; + } + } + + memory::memory(memoryPool *memPool, + udim_t size_, dim_t offset_) : + occa::modeMemory_t(memPool, size_, offset_) { + cuda::buffer* b = dynamic_cast(memPool->buffer); isUnified = b->isUnified; useHostPtr = b->useHostPtr; if (isUnified || useHostPtr) { diff --git a/src/occa/internal/modes/cuda/memory.hpp b/src/occa/internal/modes/cuda/memory.hpp index 177a686b6..3e7ecab40 100644 --- a/src/occa/internal/modes/cuda/memory.hpp +++ b/src/occa/internal/modes/cuda/memory.hpp @@ -5,6 +5,8 @@ #include #include +#include +#include namespace occa { namespace cuda { @@ -16,7 +18,9 @@ namespace occa { bool isUnified; bool useHostPtr; - memory(modeBuffer_t *modeBuffer_, + memory(buffer *b, + udim_t size_, dim_t offset_); + memory(memoryPool *memPool, udim_t size_, dim_t offset_); ~memory(); diff --git a/src/occa/internal/modes/cuda/memoryPool.cpp b/src/occa/internal/modes/cuda/memoryPool.cpp new file mode 100644 index 000000000..1eefd6e63 --- /dev/null +++ b/src/occa/internal/modes/cuda/memoryPool.cpp @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include + +namespace occa { + namespace cuda { + memoryPool::memoryPool(modeDevice_t *modeDevice_, + const occa::json &properties_) : + occa::modeMemoryPool_t(modeDevice_, properties_) {} + + CUstream& memoryPool::getCuStream() const { + return dynamic_cast(modeDevice)->getCuStream(); + } + + modeBuffer_t* memoryPool::makeBuffer() { + return new cuda::buffer(modeDevice, 0, properties); + } + + modeMemory_t* memoryPool::slice(const dim_t offset, + const udim_t bytes) { + return new cuda::memory(this, bytes, offset); + } + + void memoryPool::setPtr(modeMemory_t* mem, modeBuffer_t* buf, + const dim_t offset) { + + cuda::memory* m = dynamic_cast(mem); + cuda::buffer* b = dynamic_cast(buf); + + m->offset = offset; + if (b->isUnified || b->useHostPtr) { + m->ptr = b->ptr + offset; + } + if (b->isUnified || !b->useHostPtr) { + m->cuPtr = b->cuPtr + offset; + } + } + + void memoryPool::memcpy(modeBuffer_t* dst, const dim_t dstOffset, + modeBuffer_t* src, const dim_t srcOffset, + const udim_t bytes) { + + cuda::buffer* dstBuf = dynamic_cast(dst); + cuda::buffer* srcBuf = dynamic_cast(src); + + if (srcBuf->useHostPtr) { + ::memcpy(dstBuf->ptr + dstOffset, + srcBuf->ptr + srcOffset, + bytes); + } else { + OCCA_CUDA_ERROR("Memory: Async Copy From", + cuMemcpyDtoDAsync(dstBuf->cuPtr + dstOffset, + srcBuf->cuPtr + srcOffset, + bytes, + getCuStream())); + } + } + } +} diff --git a/src/occa/internal/modes/cuda/memoryPool.hpp b/src/occa/internal/modes/cuda/memoryPool.hpp new file mode 100644 index 000000000..08aa55583 --- /dev/null +++ b/src/occa/internal/modes/cuda/memoryPool.hpp @@ -0,0 +1,31 @@ +#include + +#ifndef OCCA_INTERNAL_MODES_CUDA_MEMORYPOOL_HEADER +#define OCCA_INTERNAL_MODES_CUDA_MEMORYPOOL_HEADER + +#include + +namespace occa { + namespace cuda { + class memoryPool : public occa::modeMemoryPool_t { + public: + memoryPool(modeDevice_t *modeDevice_, + const occa::json &properties_ = occa::json()); + + private: + CUstream& getCuStream() const; + + modeBuffer_t* makeBuffer() override; + + modeMemory_t* slice(const dim_t offset, const udim_t bytes) override; + + void setPtr(modeMemory_t* mem, modeBuffer_t* buf, const dim_t offset) override; + + void memcpy(modeBuffer_t* dst, const dim_t dstOffset, + modeBuffer_t* src, const dim_t srcOffset, + const udim_t bytes) override; + }; + } +} + +#endif diff --git a/src/occa/internal/modes/dpcpp/buffer.hpp b/src/occa/internal/modes/dpcpp/buffer.hpp index e21a222d1..d40e4058d 100644 --- a/src/occa/internal/modes/dpcpp/buffer.hpp +++ b/src/occa/internal/modes/dpcpp/buffer.hpp @@ -14,15 +14,15 @@ namespace occa { const occa::json &properties_ = occa::json()); ~buffer(); - void malloc(udim_t bytes); + void malloc(udim_t bytes) override; void wrapMemory(const void *ptr, const udim_t bytes); modeMemory_t* slice(const dim_t offset, - const udim_t bytes); + const udim_t bytes) override; - void detach(); + void detach() override; }; } // namespace dpcpp } // namespace occa diff --git a/src/occa/internal/modes/dpcpp/device.cpp b/src/occa/internal/modes/dpcpp/device.cpp index d2a1a1377..2107b4ae5 100644 --- a/src/occa/internal/modes/dpcpp/device.cpp +++ b/src/occa/internal/modes/dpcpp/device.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -365,6 +366,10 @@ namespace occa return new dpcpp::memory(buf, bytes, 0); } + modeMemoryPool_t* device::createMemoryPool(const occa::json &props) { + return new dpcpp::memoryPool(this, props); + } + udim_t device::memorySize() const { uint64_t global_mem_size{dpcppDevice.get_info<::sycl::info::device::global_mem_size>()}; diff --git a/src/occa/internal/modes/dpcpp/device.hpp b/src/occa/internal/modes/dpcpp/device.hpp index b9d676acb..3d714c4a4 100644 --- a/src/occa/internal/modes/dpcpp/device.hpp +++ b/src/occa/internal/modes/dpcpp/device.hpp @@ -90,6 +90,8 @@ namespace occa const udim_t bytes, const occa::json &props) override; + virtual modeMemoryPool_t* createMemoryPool(const occa::json &props) override; + virtual udim_t memorySize() const override; //================================ }; diff --git a/src/occa/internal/modes/dpcpp/memory.cpp b/src/occa/internal/modes/dpcpp/memory.cpp index 272058580..2d7e93696 100644 --- a/src/occa/internal/modes/dpcpp/memory.cpp +++ b/src/occa/internal/modes/dpcpp/memory.cpp @@ -10,17 +10,21 @@ namespace occa { namespace dpcpp { - memory::memory(modeBuffer_t *modeBuffer_, + memory::memory(buffer *b, udim_t size_, dim_t offset_) : - occa::modeMemory_t(modeBuffer_, size_, offset_) { - buffer *b = dynamic_cast(modeBuffer); + occa::modeMemory_t(b, size_, offset_) { ptr = b->ptr + offset; } + memory::memory(memoryPool *memPool, + udim_t size_, dim_t offset_) : + occa::modeMemory_t(memPool, size_, offset_) { + ptr = memPool->buffer->ptr + offset; + } + memory::~memory() { ptr = nullptr; - size = 0; } void *memory::getKernelArgPtr() const diff --git a/src/occa/internal/modes/dpcpp/memory.hpp b/src/occa/internal/modes/dpcpp/memory.hpp index 70ed3b992..a1a082f8e 100644 --- a/src/occa/internal/modes/dpcpp/memory.hpp +++ b/src/occa/internal/modes/dpcpp/memory.hpp @@ -3,6 +3,8 @@ #include #include +#include +#include namespace occa { @@ -13,7 +15,9 @@ namespace occa class memory : public occa::modeMemory_t { public: - memory(modeBuffer_t *modeBuffer_, + memory(buffer *b, + udim_t size_, dim_t offset_); + memory(memoryPool *memPool, udim_t size_, dim_t offset_); virtual ~memory(); diff --git a/src/occa/internal/modes/dpcpp/memoryPool.cpp b/src/occa/internal/modes/dpcpp/memoryPool.cpp new file mode 100644 index 000000000..80ab4e11b --- /dev/null +++ b/src/occa/internal/modes/dpcpp/memoryPool.cpp @@ -0,0 +1,40 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace occa { + namespace dpcpp { + memoryPool::memoryPool(modeDevice_t *modeDevice_, + const occa::json &properties_) : + occa::modeMemoryPool_t(modeDevice_, properties_) {} + + modeBuffer_t* memoryPool::makeBuffer() { + return new dpcpp::buffer(modeDevice, 0, properties); + } + + modeMemory_t* memoryPool::slice(const dim_t offset, + const udim_t bytes) { + return new dpcpp::memory(this, bytes, offset); + } + + void memoryPool::setPtr(modeMemory_t* mem, modeBuffer_t* buf, + const dim_t offset) { + mem->offset = offset; + mem->ptr = buf->ptr + offset; + } + + void memoryPool::memcpy(modeBuffer_t* dst, const dim_t dstOffset, + modeBuffer_t* src, const dim_t srcOffset, + const udim_t bytes) { + + occa::dpcpp::stream& q = getDpcppStream(modeDevice->currentStream); + occa::dpcpp::streamTag e = q.memcpy(dst->ptr + dstOffset, + src->ptr + srcOffset, + bytes); + } + } +} diff --git a/src/occa/internal/modes/dpcpp/memoryPool.hpp b/src/occa/internal/modes/dpcpp/memoryPool.hpp new file mode 100644 index 000000000..dedd3b49d --- /dev/null +++ b/src/occa/internal/modes/dpcpp/memoryPool.hpp @@ -0,0 +1,29 @@ +#include + +#ifndef OCCA_INTERNAL_MODES_DPCPP_MEMORYPOOL_HEADER +#define OCCA_INTERNAL_MODES_DPCPP_MEMORYPOOL_HEADER + +#include + +namespace occa { + namespace dpcpp { + class memoryPool : public occa::modeMemoryPool_t { + public: + memoryPool(modeDevice_t *modeDevice_, + const occa::json &properties_ = occa::json()); + + private: + modeBuffer_t* makeBuffer() override; + + modeMemory_t* slice(const dim_t offset, const udim_t bytes) override; + + void setPtr(modeMemory_t* mem, modeBuffer_t* buf, const dim_t offset) override; + + void memcpy(modeBuffer_t* dst, const dim_t dstOffset, + modeBuffer_t* src, const dim_t srcOffset, + const udim_t bytes) override; + }; + } +} + +#endif diff --git a/src/occa/internal/modes/hip/buffer.cpp b/src/occa/internal/modes/hip/buffer.cpp index b76a9c0be..f48c4a94c 100644 --- a/src/occa/internal/modes/hip/buffer.cpp +++ b/src/occa/internal/modes/hip/buffer.cpp @@ -19,7 +19,7 @@ namespace occa { hipHostFree(ptr)); } else if (hipPtr) { OCCA_HIP_ERROR("Device: free()", - hipFree((void*) hipPtr)); + hipFree((void*) hipPtr)); } } ptr = nullptr; diff --git a/src/occa/internal/modes/hip/buffer.hpp b/src/occa/internal/modes/hip/buffer.hpp index c14617f83..8c7af47c9 100644 --- a/src/occa/internal/modes/hip/buffer.hpp +++ b/src/occa/internal/modes/hip/buffer.hpp @@ -9,24 +9,30 @@ namespace occa { namespace hip { + class memory; + class memoryPool; + class buffer : public occa::modeBuffer_t { + friend class hip::memory; + friend class hip::memoryPool; + public: buffer(modeDevice_t *modeDevice_, udim_t size_, const occa::json &properties_ = occa::json()); ~buffer(); - void malloc(udim_t bytes); + void malloc(udim_t bytes) override; void wrapMemory(const void *ptr, const udim_t bytes); modeMemory_t* slice(const dim_t offset, - const udim_t bytes); + const udim_t bytes) override; - void detach(); + void detach() override; - public: + private: hipDeviceptr_t hipPtr; bool useHostPtr; }; diff --git a/src/occa/internal/modes/hip/device.cpp b/src/occa/internal/modes/hip/device.cpp index 1310e64ec..8a690a044 100644 --- a/src/occa/internal/modes/hip/device.cpp +++ b/src/occa/internal/modes/hip/device.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -425,6 +426,10 @@ namespace occa { return new hip::memory(buf, bytes, 0); } + modeMemoryPool_t* device::createMemoryPool(const occa::json &props) { + return new hip::memoryPool(this, props); + } + udim_t device::memorySize() const { return hip::getDeviceMemorySize(hipDevice); } diff --git a/src/occa/internal/modes/hip/device.hpp b/src/occa/internal/modes/hip/device.hpp index ed3df60ce..5fa1de719 100644 --- a/src/occa/internal/modes/hip/device.hpp +++ b/src/occa/internal/modes/hip/device.hpp @@ -87,6 +87,8 @@ namespace occa { const udim_t bytes, const occa::json &props); + modeMemoryPool_t* createMemoryPool(const occa::json &props); + virtual udim_t memorySize() const; //================================ }; diff --git a/src/occa/internal/modes/hip/memory.cpp b/src/occa/internal/modes/hip/memory.cpp index fbb2feb75..ba23ec01d 100644 --- a/src/occa/internal/modes/hip/memory.cpp +++ b/src/occa/internal/modes/hip/memory.cpp @@ -9,10 +9,21 @@ namespace occa { return (hipDeviceptr_t) (((char*) hipPtr) + offset); } - memory::memory(modeBuffer_t *modeBuffer_, + memory::memory(buffer *b, udim_t size_, dim_t offset_) : - occa::modeMemory_t(modeBuffer_, size_, offset_) { - buffer *b = dynamic_cast(modeBuffer); + occa::modeMemory_t(b, size_, offset_) { + useHostPtr = b->useHostPtr; + if (useHostPtr) { + ptr = b->ptr + offset; + } else { + hipPtr = addHipPtrOffset(b->hipPtr, offset); + } + } + + memory::memory(memoryPool *memPool, + udim_t size_, dim_t offset_) : + occa::modeMemory_t(memPool, size_, offset_) { + hip::buffer* b = dynamic_cast(memPool->buffer); useHostPtr = b->useHostPtr; if (useHostPtr) { ptr = b->ptr + offset; diff --git a/src/occa/internal/modes/hip/memory.hpp b/src/occa/internal/modes/hip/memory.hpp index fa67a4efa..c6bc03850 100644 --- a/src/occa/internal/modes/hip/memory.hpp +++ b/src/occa/internal/modes/hip/memory.hpp @@ -3,6 +3,8 @@ #include #include +#include +#include namespace occa { namespace hip { @@ -13,7 +15,9 @@ namespace occa { hipDeviceptr_t hipPtr; bool useHostPtr; - memory(modeBuffer_t *modeBuffer_, + memory(buffer *b, + udim_t size_, dim_t offset_); + memory(memoryPool *memPool, udim_t size_, dim_t offset_); ~memory(); diff --git a/src/occa/internal/modes/hip/memoryPool.cpp b/src/occa/internal/modes/hip/memoryPool.cpp new file mode 100644 index 000000000..de00861fc --- /dev/null +++ b/src/occa/internal/modes/hip/memoryPool.cpp @@ -0,0 +1,64 @@ +#include +#include +#include +#include +#include + +namespace occa { + namespace hip { + inline hipDeviceptr_t addHipPtrOffset(hipDeviceptr_t hipPtr, const udim_t offset) { + return (hipDeviceptr_t) (((char*) hipPtr) + offset); + } + + memoryPool::memoryPool(modeDevice_t *modeDevice_, + const occa::json &properties_) : + occa::modeMemoryPool_t(modeDevice_, properties_) {} + + hipStream_t& memoryPool::getHipStream() const { + return dynamic_cast(modeDevice)->getHipStream(); + } + + modeBuffer_t* memoryPool::makeBuffer() { + return new hip::buffer(modeDevice, 0, properties); + } + + modeMemory_t* memoryPool::slice(const dim_t offset, + const udim_t bytes) { + return new hip::memory(this, bytes, offset); + } + + void memoryPool::setPtr(modeMemory_t* mem, modeBuffer_t* buf, + const dim_t offset) { + + hip::memory* m = dynamic_cast(mem); + hip::buffer* b = dynamic_cast(buf); + + m->offset = offset; + if (b->useHostPtr) { + m->ptr = b->ptr + offset; + } else { + m->hipPtr = addHipPtrOffset(b->hipPtr, offset); + } + } + + void memoryPool::memcpy(modeBuffer_t* dst, const dim_t dstOffset, + modeBuffer_t* src, const dim_t srcOffset, + const udim_t bytes) { + + hip::buffer* dstBuf = dynamic_cast(dst); + hip::buffer* srcBuf = dynamic_cast(src); + + if (srcBuf->useHostPtr) { + ::memcpy(dstBuf->ptr + dstOffset, + srcBuf->ptr + srcOffset, + bytes); + } else { + OCCA_HIP_ERROR("Memory: Async Copy From", + hipMemcpyDtoDAsync(addHipPtrOffset(dstBuf->hipPtr, dstOffset), + addHipPtrOffset(srcBuf->hipPtr, srcOffset), + bytes, + getHipStream())); + } + } + } +} diff --git a/src/occa/internal/modes/hip/memoryPool.hpp b/src/occa/internal/modes/hip/memoryPool.hpp new file mode 100644 index 000000000..a0f35c4be --- /dev/null +++ b/src/occa/internal/modes/hip/memoryPool.hpp @@ -0,0 +1,31 @@ +#include + +#ifndef OCCA_INTERNAL_MODES_HIP_MEMORYPOOL_HEADER +#define OCCA_INTERNAL_MODES_HIP_MEMORYPOOL_HEADER + +#include + +namespace occa { + namespace hip { + class memoryPool : public occa::modeMemoryPool_t { + public: + memoryPool(modeDevice_t *modeDevice_, + const occa::json &properties_ = occa::json()); + + private: + hipStream_t& getHipStream() const; + + modeBuffer_t* makeBuffer() override; + + modeMemory_t* slice(const dim_t offset, const udim_t bytes) override; + + void setPtr(modeMemory_t* mem, modeBuffer_t* buf, const dim_t offset) override; + + void memcpy(modeBuffer_t* dst, const dim_t dstOffset, + modeBuffer_t* src, const dim_t srcOffset, + const udim_t bytes) override; + }; + } +} + +#endif diff --git a/src/occa/internal/modes/metal/buffer.hpp b/src/occa/internal/modes/metal/buffer.hpp index 6b8e471e3..c381c118f 100644 --- a/src/occa/internal/modes/metal/buffer.hpp +++ b/src/occa/internal/modes/metal/buffer.hpp @@ -4,16 +4,15 @@ #include #include #include -#include namespace occa { namespace metal { - class buffer : public occa::modeBuffer_t { - friend class metal::memory; + class memory; + class memoryPool; - private: - api::metal::buffer_t metalBuffer; - udim_t bufferOffset; + class buffer : public occa::modeBuffer_t { + friend class metal::memory; + friend class metal::memoryPool; public: buffer(modeDevice_t *modeDevice_, @@ -21,17 +20,20 @@ namespace occa { const occa::json &properties_ = occa::json()); ~buffer(); - void malloc(udim_t bytes); + void malloc(udim_t bytes) override; void wrapMemory(const void *ptr, const udim_t bytes); modeMemory_t* slice(const dim_t offset, - const udim_t bytes); + const udim_t bytes) override; void* getPtr(); - void detach(); + void detach() override; + + private: + api::metal::buffer_t metalBuffer; }; } } diff --git a/src/occa/internal/modes/metal/device.cpp b/src/occa/internal/modes/metal/device.cpp index 4f311587a..c7cddd106 100644 --- a/src/occa/internal/modes/metal/device.cpp +++ b/src/occa/internal/modes/metal/device.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -303,6 +304,10 @@ namespace occa { return new metal::memory(buf, bytes, 0); } + modeMemoryPool_t* device::createMemoryPool(const occa::json &props) { + return new metal::memoryPool(this, props); + } + udim_t device::memorySize() const { return metalDevice.getMemorySize(); } diff --git a/src/occa/internal/modes/metal/device.hpp b/src/occa/internal/modes/metal/device.hpp index 92938b6c6..d105ed9e9 100644 --- a/src/occa/internal/modes/metal/device.hpp +++ b/src/occa/internal/modes/metal/device.hpp @@ -80,6 +80,8 @@ namespace occa { const udim_t bytes, const occa::json &props); + modeMemoryPool_t* createMemoryPool(const occa::json &props); + virtual udim_t memorySize() const; //================================ }; diff --git a/src/occa/internal/modes/metal/memory.cpp b/src/occa/internal/modes/metal/memory.cpp index 49313e6c0..cb7512210 100644 --- a/src/occa/internal/modes/metal/memory.cpp +++ b/src/occa/internal/modes/metal/memory.cpp @@ -5,11 +5,19 @@ namespace occa { namespace metal { - memory::memory(modeBuffer_t *modeBuffer_, + memory::memory(buffer *b, udim_t size_, dim_t offset_) : - occa::modeMemory_t(modeBuffer_, size_, offset_), + occa::modeMemory_t(b, size_, offset_), bufferOffset(offset) { - buffer *b = dynamic_cast(modeBuffer); + metalBuffer = b->metalBuffer; + ptr = (char*) metalBuffer.getPtr(); + } + + memory::memory(memoryPool *memPool, + udim_t size_, dim_t offset_) : + occa::modeMemory_t(memPool, size_, offset_), + bufferOffset(offset) { + metal::buffer* b = dynamic_cast(memPool->buffer); metalBuffer = b->metalBuffer; ptr = (char*) metalBuffer.getPtr(); } @@ -17,7 +25,6 @@ namespace occa { memory::~memory() { metalBuffer = NULL; bufferOffset = 0; - size = 0; } void* memory::getKernelArgPtr() const { @@ -46,7 +53,7 @@ namespace occa { ((metal::device*) getModeDevice())->metalCommandQueue ); metalCommandQueue.memcpy(metalBuffer, - offset_, + bufferOffset+offset_, src, bytes, async); @@ -63,9 +70,9 @@ namespace occa { ((metal::device*) getModeDevice())->metalCommandQueue ); metalCommandQueue.memcpy(metalBuffer, - destOffset, + bufferOffset+destOffset, ((const metal::memory*) src)->metalBuffer, - srcOffset, + ((const metal::memory*) src)->bufferOffset + srcOffset, bytes, async); } @@ -82,7 +89,7 @@ namespace occa { ); metalCommandQueue.memcpy(dest, metalBuffer, - offset_, + bufferOffset + offset_, bytes, async); } diff --git a/src/occa/internal/modes/metal/memory.hpp b/src/occa/internal/modes/metal/memory.hpp index e37d7d64d..94a310899 100644 --- a/src/occa/internal/modes/metal/memory.hpp +++ b/src/occa/internal/modes/metal/memory.hpp @@ -2,17 +2,21 @@ #define OCCA_INTERNAL_MODES_METAL_MEMORY_HEADER #include +#include +#include #include namespace occa { namespace metal { class memory : public occa::modeMemory_t { - private: + public: api::metal::buffer_t metalBuffer; udim_t bufferOffset; public: - memory(modeBuffer_t *modeBuffer_, + memory(buffer *b, + udim_t size_, dim_t offset_); + memory(memoryPool *memPool, udim_t size_, dim_t offset_); ~memory(); diff --git a/src/occa/internal/modes/metal/memoryPool.cpp b/src/occa/internal/modes/metal/memoryPool.cpp new file mode 100644 index 000000000..11eb10113 --- /dev/null +++ b/src/occa/internal/modes/metal/memoryPool.cpp @@ -0,0 +1,52 @@ +#include +#include +#include +#include + +namespace occa { + namespace metal { + memoryPool::memoryPool(modeDevice_t *modeDevice_, + const occa::json &properties_) : + occa::modeMemoryPool_t(modeDevice_, properties_) {} + + modeBuffer_t* memoryPool::makeBuffer() { + return new metal::buffer(modeDevice, 0, properties); + } + + modeMemory_t* memoryPool::slice(const dim_t offset, + const udim_t bytes) { + return new metal::memory(this, bytes, offset); + } + + void memoryPool::setPtr(modeMemory_t* mem, modeBuffer_t* buf, + const dim_t offset) { + + metal::memory* m = dynamic_cast(mem); + metal::buffer* b = dynamic_cast(buf); + + m->offset = offset; + m->metalBuffer = b->metalBuffer; + m->ptr = (char*) b->metalBuffer.getPtr(); + } + + void memoryPool::memcpy(modeBuffer_t* dst, const dim_t dstOffset, + modeBuffer_t* src, const dim_t srcOffset, + const udim_t bytes) { + + metal::buffer* dstBuf = dynamic_cast(dst); + metal::buffer* srcBuf = dynamic_cast(src); + + const bool async = true; + + api::metal::commandQueue_t &metalCommandQueue = ( + ((metal::device*) modeDevice)->metalCommandQueue + ); + metalCommandQueue.memcpy(dstBuf->metalBuffer, + dstOffset, + srcBuf->metalBuffer, + srcOffset, + bytes, + async); + } + } +} diff --git a/src/occa/internal/modes/metal/memoryPool.hpp b/src/occa/internal/modes/metal/memoryPool.hpp new file mode 100644 index 000000000..317e2a764 --- /dev/null +++ b/src/occa/internal/modes/metal/memoryPool.hpp @@ -0,0 +1,29 @@ +#include + +#ifndef OCCA_INTERNAL_MODES_METAL_MEMORYPOOL_HEADER +#define OCCA_INTERNAL_MODES_METAL_MEMORYPOOL_HEADER + +#include + +namespace occa { + namespace metal { + class memoryPool : public occa::modeMemoryPool_t { + public: + memoryPool(modeDevice_t *modeDevice_, + const occa::json &properties_ = occa::json()); + + private: + modeBuffer_t* makeBuffer() override; + + modeMemory_t* slice(const dim_t offset, const udim_t bytes) override; + + void setPtr(modeMemory_t* mem, modeBuffer_t* buf, const dim_t offset) override; + + void memcpy(modeBuffer_t* dst, const dim_t dstOffset, + modeBuffer_t* src, const dim_t srcOffset, + const udim_t bytes) override; + }; + } +} + +#endif diff --git a/src/occa/internal/modes/opencl/buffer.hpp b/src/occa/internal/modes/opencl/buffer.hpp index a4ba13316..2ad34c382 100644 --- a/src/occa/internal/modes/opencl/buffer.hpp +++ b/src/occa/internal/modes/opencl/buffer.hpp @@ -3,17 +3,16 @@ #include #include -#include #include namespace occa { namespace opencl { + class memory; + class memoryPool; + class buffer : public occa::modeBuffer_t { friend class opencl::memory; - - private: - cl_mem clMem; - bool useHostPtr; + friend class opencl::memoryPool; public: buffer(modeDevice_t *modeDevice_, @@ -21,15 +20,19 @@ namespace occa { const occa::json &properties_ = occa::json()); ~buffer(); - void malloc(udim_t bytes); + void malloc(udim_t bytes) override; void wrapMemory(const void *ptr, const udim_t bytes); modeMemory_t* slice(const dim_t offset, - const udim_t bytes); + const udim_t bytes) override; - void detach(); + void detach() override; + + private: + cl_mem clMem; + bool useHostPtr; }; } } diff --git a/src/occa/internal/modes/opencl/device.cpp b/src/occa/internal/modes/opencl/device.cpp index bf1e463f5..d4a231e16 100644 --- a/src/occa/internal/modes/opencl/device.cpp +++ b/src/occa/internal/modes/opencl/device.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -356,6 +357,10 @@ namespace occa { return new opencl::memory(buf, bytes, 0); } + modeMemoryPool_t* device::createMemoryPool(const occa::json &props) { + return new opencl::memoryPool(this, props); + } + udim_t device::memorySize() const { return opencl::deviceGlobalMemSize(clDevice); } diff --git a/src/occa/internal/modes/opencl/device.hpp b/src/occa/internal/modes/opencl/device.hpp index 8b4937bec..8e004894e 100644 --- a/src/occa/internal/modes/opencl/device.hpp +++ b/src/occa/internal/modes/opencl/device.hpp @@ -88,6 +88,8 @@ namespace occa { const udim_t bytes, const occa::json &props); + modeMemoryPool_t* createMemoryPool(const occa::json &props); + virtual udim_t memorySize() const; //================================ }; diff --git a/src/occa/internal/modes/opencl/memory.cpp b/src/occa/internal/modes/opencl/memory.cpp index 79f738a64..df77b5802 100644 --- a/src/occa/internal/modes/opencl/memory.cpp +++ b/src/occa/internal/modes/opencl/memory.cpp @@ -6,11 +6,37 @@ namespace occa { namespace opencl { - memory::memory(modeBuffer_t *modeBuffer_, + memory::memory(buffer *b, udim_t size_, dim_t offset_) : - occa::modeMemory_t(modeBuffer_, size_, offset_), + occa::modeMemory_t(b, size_, offset_), useHostPtr(false) { - buffer *b = dynamic_cast(modeBuffer); + useHostPtr = b->useHostPtr; + + if (offset==0 && size==b->size){ + clMem = b->clMem; + } else { + cl_buffer_region info; + info.origin = offset; + info.size = size; + + cl_int error; + clMem = clCreateSubBuffer(b->clMem, + CL_MEM_READ_WRITE, + CL_BUFFER_CREATE_TYPE_REGION, + &info, + &error); + OCCA_OPENCL_ERROR("Device: clCreateSubBuffer", error); + } + if (useHostPtr) { + ptr = b->ptr + offset; + } + } + + memory::memory(memoryPool *memPool, + udim_t size_, dim_t offset_) : + occa::modeMemory_t(memPool, size_, offset_), + useHostPtr(false) { + opencl::buffer* b = dynamic_cast(memPool->buffer); useHostPtr = b->useHostPtr; if (offset==0 && size==b->size){ @@ -34,7 +60,6 @@ namespace occa { } memory::~memory() { - size = 0; useHostPtr = false; } diff --git a/src/occa/internal/modes/opencl/memory.hpp b/src/occa/internal/modes/opencl/memory.hpp index 28ba7a339..101b2085f 100644 --- a/src/occa/internal/modes/opencl/memory.hpp +++ b/src/occa/internal/modes/opencl/memory.hpp @@ -3,6 +3,8 @@ #include #include +#include +#include namespace occa { namespace opencl { @@ -11,12 +13,14 @@ namespace occa { class memory : public occa::modeMemory_t { friend cl_mem getCLMemory(occa::memory memory); - private: + public: cl_mem clMem; bool useHostPtr; public: - memory(modeBuffer_t *modeBuffer_, + memory(buffer *b, + udim_t size_, dim_t offset_); + memory(memoryPool *memPool, udim_t size_, dim_t offset_); ~memory(); diff --git a/src/occa/internal/modes/opencl/memoryPool.cpp b/src/occa/internal/modes/opencl/memoryPool.cpp new file mode 100644 index 000000000..2cc5f9251 --- /dev/null +++ b/src/occa/internal/modes/opencl/memoryPool.cpp @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include + +namespace occa { + namespace opencl { + memoryPool::memoryPool(modeDevice_t *modeDevice_, + const occa::json &properties_) : + occa::modeMemoryPool_t(modeDevice_, properties_) {} + + cl_command_queue& memoryPool::getCommandQueue() const { + return dynamic_cast(modeDevice)->getCommandQueue(); + } + + modeBuffer_t* memoryPool::makeBuffer() { + return new opencl::buffer(modeDevice, 0, properties); + } + + modeMemory_t* memoryPool::slice(const dim_t offset, + const udim_t bytes) { + return new opencl::memory(this, bytes, offset); + } + + void memoryPool::setPtr(modeMemory_t* mem, modeBuffer_t* buf, + const dim_t offset) { + + opencl::memory* m = dynamic_cast(mem); + opencl::buffer* b = dynamic_cast(buf); + + if (offset==0 && m->size==b->size){ + m->clMem = b->clMem; + } else { + cl_buffer_region info; + info.origin = offset; + info.size = size; + + cl_int error; + m->clMem = clCreateSubBuffer(b->clMem, + CL_MEM_READ_WRITE, + CL_BUFFER_CREATE_TYPE_REGION, + &info, + &error); + OCCA_OPENCL_ERROR("Device: clCreateSubBuffer", error); + } + if (m->useHostPtr) { + m->ptr = b->ptr + offset; + } + } + + void memoryPool::memcpy(modeBuffer_t* dst, const dim_t dstOffset, + modeBuffer_t* src, const dim_t srcOffset, + const udim_t bytes) { + + opencl::buffer* dstBuf = dynamic_cast(dst); + opencl::buffer* srcBuf = dynamic_cast(src); + + const bool async = true; + + OCCA_OPENCL_ERROR("Memory: " << (async ? "Async " : "") << "Copy From", + clEnqueueCopyBuffer(getCommandQueue(), + srcBuf->clMem, + dstBuf->clMem, + srcOffset, dstOffset, + bytes, + 0, NULL, NULL)); + } + } +} diff --git a/src/occa/internal/modes/opencl/memoryPool.hpp b/src/occa/internal/modes/opencl/memoryPool.hpp new file mode 100644 index 000000000..6bb560d5b --- /dev/null +++ b/src/occa/internal/modes/opencl/memoryPool.hpp @@ -0,0 +1,31 @@ +#include + +#ifndef OCCA_INTERNAL_MODES_OPENCL_MEMORYPOOL_HEADER +#define OCCA_INTERNAL_MODES_OPENCL_MEMORYPOOL_HEADER + +#include + +namespace occa { + namespace opencl { + class memoryPool : public occa::modeMemoryPool_t { + public: + memoryPool(modeDevice_t *modeDevice_, + const occa::json &properties_ = occa::json()); + + private: + cl_command_queue& getCommandQueue() const; + + modeBuffer_t* makeBuffer() override; + + modeMemory_t* slice(const dim_t offset, const udim_t bytes) override; + + void setPtr(modeMemory_t* mem, modeBuffer_t* buf, const dim_t offset) override; + + void memcpy(modeBuffer_t* dst, const dim_t dstOffset, + modeBuffer_t* src, const dim_t srcOffset, + const udim_t bytes) override; + }; + } +} + +#endif diff --git a/src/occa/internal/modes/opencl/polyfill.hpp b/src/occa/internal/modes/opencl/polyfill.hpp index faaa18342..b1a5378f8 100644 --- a/src/occa/internal/modes/opencl/polyfill.hpp +++ b/src/occa/internal/modes/opencl/polyfill.hpp @@ -87,6 +87,7 @@ namespace occa { static cl_device_type CL_DEVICE_TYPE_CPU = 1; static cl_device_type CL_DEVICE_TYPE_GPU = 2; static cl_device_type CL_DEVICE_TYPE_ALL = 3; + static cl_device_type CL_DEVICE_TYPE_DEFAULT = 4; static cl_kernel_work_group_info CL_KERNEL_WORK_GROUP_SIZE = 0; diff --git a/src/occa/internal/modes/opencl/utils.cpp b/src/occa/internal/modes/opencl/utils.cpp index 8fd4e650b..aa9f1f9c3 100644 --- a/src/occa/internal/modes/opencl/utils.cpp +++ b/src/occa/internal/modes/opencl/utils.cpp @@ -123,7 +123,7 @@ namespace occa { } cl_device_type deviceType(info::device_type type) { - cl_device_type dtype = CL_DEVICE_TYPE_ALL; + cl_device_type dtype = CL_DEVICE_TYPE_DEFAULT; switch (type) { case info::device_type::cpu: dtype = CL_DEVICE_TYPE_CPU; diff --git a/src/occa/internal/modes/serial/buffer.hpp b/src/occa/internal/modes/serial/buffer.hpp index 566ee875e..a957fac6b 100644 --- a/src/occa/internal/modes/serial/buffer.hpp +++ b/src/occa/internal/modes/serial/buffer.hpp @@ -14,15 +14,15 @@ namespace occa { const occa::json &properties_ = occa::json()); ~buffer(); - void malloc(udim_t bytes); + void malloc(udim_t bytes) override; void wrapMemory(const void *ptr, const udim_t bytes); modeMemory_t* slice(const dim_t offset, - const udim_t bytes); + const udim_t bytes) override; - void detach(); + void detach() override; }; } } diff --git a/src/occa/internal/modes/serial/device.cpp b/src/occa/internal/modes/serial/device.cpp index 4918265ca..407ef5b60 100644 --- a/src/occa/internal/modes/serial/device.cpp +++ b/src/occa/internal/modes/serial/device.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -459,6 +460,10 @@ namespace occa { return new serial::memory(buf, bytes, 0); } + modeMemoryPool_t* device::createMemoryPool(const occa::json &props) { + return new serial::memoryPool(this, props); + } + udim_t device::memorySize() const { return sys::SystemInfo::load().memory.total; } diff --git a/src/occa/internal/modes/serial/device.hpp b/src/occa/internal/modes/serial/device.hpp index 8f7e8f026..1f39e3f74 100644 --- a/src/occa/internal/modes/serial/device.hpp +++ b/src/occa/internal/modes/serial/device.hpp @@ -69,6 +69,8 @@ namespace occa { const udim_t bytes, const occa::json &props); + modeMemoryPool_t* createMemoryPool(const occa::json &props); + virtual udim_t memorySize() const; //================================ }; diff --git a/src/occa/internal/modes/serial/memory.cpp b/src/occa/internal/modes/serial/memory.cpp index d3e3a7f36..b3a890908 100644 --- a/src/occa/internal/modes/serial/memory.cpp +++ b/src/occa/internal/modes/serial/memory.cpp @@ -6,13 +6,18 @@ namespace occa { namespace serial { - memory::memory(modeBuffer_t *modeBuffer_, + memory::memory(buffer *b, udim_t size_, dim_t offset_) : - occa::modeMemory_t(modeBuffer_, size_, offset_) { - buffer *b = dynamic_cast(modeBuffer); + occa::modeMemory_t(b, size_, offset_) { ptr = b->ptr + offset; } + memory::memory(memoryPool *memPool, + udim_t size_, dim_t offset_) : + occa::modeMemory_t(memPool, size_, offset_) { + ptr = memPool->buffer->ptr + offset; + } + memory::~memory() {} void* memory::getKernelArgPtr() const { diff --git a/src/occa/internal/modes/serial/memory.hpp b/src/occa/internal/modes/serial/memory.hpp index d4cedfe56..3879720fd 100644 --- a/src/occa/internal/modes/serial/memory.hpp +++ b/src/occa/internal/modes/serial/memory.hpp @@ -3,12 +3,16 @@ #include #include +#include +#include namespace occa { namespace serial { class memory : public occa::modeMemory_t { public: - memory(modeBuffer_t *modeBuffer_, + memory(buffer *b, + udim_t size_, dim_t offset_); + memory(memoryPool *memPool, udim_t size_, dim_t offset_); ~memory(); diff --git a/src/occa/internal/modes/serial/memoryPool.cpp b/src/occa/internal/modes/serial/memoryPool.cpp new file mode 100644 index 000000000..c4e142de4 --- /dev/null +++ b/src/occa/internal/modes/serial/memoryPool.cpp @@ -0,0 +1,37 @@ +#include +#include +#include +#include +#include +#include + +namespace occa { + namespace serial { + memoryPool::memoryPool(modeDevice_t *modeDevice_, + const occa::json &properties_) : + occa::modeMemoryPool_t(modeDevice_, properties_) {} + + modeBuffer_t* memoryPool::makeBuffer() { + return new serial::buffer(modeDevice, 0, properties); + } + + modeMemory_t* memoryPool::slice(const dim_t offset, + const udim_t bytes) { + return new serial::memory(this, bytes, offset); + } + + void memoryPool::setPtr(modeMemory_t* mem, modeBuffer_t* buf, + const dim_t offset) { + mem->offset = offset; + mem->ptr = buf->ptr + offset; + } + + void memoryPool::memcpy(modeBuffer_t* dst, const dim_t dstOffset, + modeBuffer_t* src, const dim_t srcOffset, + const udim_t bytes) { + ::memcpy(dst->ptr + dstOffset, + src->ptr + srcOffset, + bytes); + } + } +} diff --git a/src/occa/internal/modes/serial/memoryPool.hpp b/src/occa/internal/modes/serial/memoryPool.hpp new file mode 100644 index 000000000..3330119ff --- /dev/null +++ b/src/occa/internal/modes/serial/memoryPool.hpp @@ -0,0 +1,29 @@ +#include + +#ifndef OCCA_INTERNAL_MODES_SERIAL_MEMORYPOOL_HEADER +#define OCCA_INTERNAL_MODES_SERIAL_MEMORYPOOL_HEADER + +#include + +namespace occa { + namespace serial { + class memoryPool : public occa::modeMemoryPool_t { + public: + memoryPool(modeDevice_t *modeDevice_, + const occa::json &properties_ = occa::json()); + + private: + modeBuffer_t* makeBuffer() override; + + modeMemory_t* slice(const dim_t offset, const udim_t bytes) override; + + void setPtr(modeMemory_t* mem, modeBuffer_t* buf, const dim_t offset) override; + + void memcpy(modeBuffer_t* dst, const dim_t dstOffset, + modeBuffer_t* src, const dim_t srcOffset, + const udim_t bytes) override; + }; + } +} + +#endif diff --git a/tests/src/c/memoryPool.cpp b/tests/src/c/memoryPool.cpp new file mode 100644 index 000000000..9555f51fd --- /dev/null +++ b/tests/src/c/memoryPool.cpp @@ -0,0 +1,232 @@ +#define OCCA_DISABLE_VARIADIC_MACROS + +#include +#include +#include +#include +#include + +void testInit(); +void testReserve(); + +int main(const int argc, const char **argv) { + testInit(); + testReserve(); + + return 0; +} + +void testInit() { + occaMemoryPool memPool = occaUndefined; + occaJson props = ( + occaJsonParse("{foo: 'bar'}") + ); + + ASSERT_TRUE(occaIsUndefined(memPool)); + ASSERT_EQ(memPool.type, + OCCA_UNDEFINED); + ASSERT_FALSE(occaMemoryPoolIsInitialized(memPool)); + + memPool = occaCreateMemoryPool(props); + ASSERT_FALSE(occaIsUndefined(memPool)); + ASSERT_EQ(memPool.type, + OCCA_MEMORYPOOL); + ASSERT_TRUE(occaMemoryPoolIsInitialized(memPool)); + + ASSERT_EQ(occa::c::device(occaMemoryPoolGetDevice(memPool)), + occa::host()); + + occaJson memPoolProps = occaMemoryPoolGetProperties(memPool); + occaType memPoolMode = occaJsonObjectGet(memPoolProps, "foo", occaUndefined); + ASSERT_EQ((const char*) occaJsonGetString(memPoolMode), + (const char*) "bar"); + + occaFree(&props); + occaFree(&memPool); +} + +void testReserve() { + #define ASSERT_SAME_SIZE(a, b) \ + ASSERT_EQ((size_t) (a), (size_t) (b)) + + float *data = new float[30]; + float *test = new float[30]; + for (int i = 0; i < 30; ++i) { + data[i] = i; + } + + occaMemoryPool memPool = occaCreateMemoryPool(occaDefault); + + /*Set aligment to 5*sizeof(float) bytes*/ + occaMemoryPoolSetAlignment(memPool, 5 * sizeof(float)); + + /*Set a size for the memoryPool*/ + occaMemoryPoolResize(memPool, 10 * sizeof(float)); + + occaDevice device = occaMemoryPoolGetDevice(memPool); + + ASSERT_SAME_SIZE(occaDeviceMemoryAllocated(device), 10 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolSize(memPool), 10 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolReserved(memPool), 0); + + /*Make a reservation*/ + occaMemory mem = occaMemoryPoolTypedReserve(memPool, 10, occaDtypeFloat); + occaCopyPtrToMem(mem, data, + occaAllBytes, 0, + occaDefault); + + ASSERT_SAME_SIZE(occaDeviceMemoryAllocated(device), 10 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolSize(memPool), 10 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolReserved(memPool), 10 * sizeof(float)); + + { + /*Test slicing mem in memoryPool. Should not trigger reallocation or + increase in memoryPool's reservation size*/ + occaMemory half1 = occaMemorySlice(mem, 0, 5); + occaMemory half2 = occaMemorySlice(mem, 5, occaAllBytes); + + ASSERT_SAME_SIZE(occaDeviceMemoryAllocated(device), 10 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolSize(memPool), 10 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolReserved(memPool), 10 * sizeof(float)); + + occaCopyMemToPtr(test, half1, + occaAllBytes, 0, + occaDefault); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(static_cast(test[i]), i); + } + occaCopyMemToPtr(test, half2, + occaAllBytes, 0, + occaDefault); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(static_cast(test[i]), i+5); + } + + /*Trigger resize*/ + occaMemory mem2 = occaMemoryPoolReserve(memPool, 10 * sizeof(float)); + + ASSERT_SAME_SIZE(occaDeviceMemoryAllocated(device), 20 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolSize(memPool), 20 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolReserved(memPool), 20 * sizeof(float)); + + occaCopyMemToPtr(test, mem, + occaAllBytes, 0, + occaDefault); + for (int i = 0; i < 10; ++i) { + ASSERT_EQ(static_cast(test[i]), i); + } + occaCopyMemToPtr(test, half1, + occaAllBytes, 0, + occaDefault); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(static_cast(test[i]), i); + } + occaCopyMemToPtr(test, half2, + occaAllBytes, 0, + occaDefault); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(static_cast(test[i]), i+5); + } + + occaFree(&half1); + occaFree(&half2); + occaFree(&mem2); + } + + /*Delete buffers, memoryPool size does not change, but reservation is smaller*/ + ASSERT_SAME_SIZE(occaDeviceMemoryAllocated(device), 20 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolSize(memPool), 20 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolReserved(memPool), 10 * sizeof(float)); + + /*Reserve again, should not trigger a resize*/ + occaMemory mem2 = occaMemoryPoolReserve(memPool, 10 * sizeof(float)); + occaCopyPtrToMem(mem2, data+10, + occaAllBytes, 0, + occaDefault); + + ASSERT_SAME_SIZE(occaDeviceMemoryAllocated(device), 20 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolSize(memPool), 20 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolReserved(memPool), 20 * sizeof(float)); + + /*Trigger resize*/ + occaMemory mem3 = occaMemoryPoolReserve(memPool, 5 * sizeof(float)); + occaMemory mem4 = occaMemoryPoolReserve(memPool, 5 * sizeof(float)); + occaCopyPtrToMem(mem3, data+20, + occaAllBytes, 0, + occaDefault); + occaCopyPtrToMem(mem4, data+25, + occaAllBytes, 0, + occaDefault); + + ASSERT_SAME_SIZE(occaDeviceMemoryAllocated(device), 30 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolSize(memPool), 30 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolReserved(memPool), 30 * sizeof(float)); + + occaCopyMemToPtr(test, mem2, + occaAllBytes, 0, + occaDefault); + for (int i = 0; i < 10; ++i) { + ASSERT_EQ(static_cast(test[i]), i+10); + } + + /*Delete mem and mem3 to make gaps*/ + occaFree(&mem); + occaFree(&mem3); + ASSERT_SAME_SIZE(occaDeviceMemoryAllocated(device), 30 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolSize(memPool), 30 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolReserved(memPool), 15 * sizeof(float)); + + /*Trigger a resize again, which shifts mem2 and mem4 */ + mem = occaMemoryPoolReserve(memPool, 20 * sizeof(float)); + ASSERT_SAME_SIZE(occaDeviceMemoryAllocated(device), 35 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolSize(memPool), 35 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolReserved(memPool), 35 * sizeof(float)); + + occaCopyMemToPtr(test, mem2, + occaAllBytes, 0, + occaDefault); + for (int i = 0; i < 10; ++i) { + ASSERT_EQ(static_cast(test[i]), i+10); + } + occaCopyMemToPtr(test, mem4, + occaAllBytes, 0, + occaDefault); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(static_cast(test[i]), i+25); + } + + /*Manually free mem2*/ + occaFree(&mem2); + ASSERT_SAME_SIZE(occaDeviceMemoryAllocated(device), 35 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolSize(memPool), 35 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolReserved(memPool), 25 * sizeof(float)); + + /*Shrink pool to fit*/ + occaMemoryPoolShrinkToFit(memPool); + ASSERT_SAME_SIZE(occaDeviceMemoryAllocated(device), 25 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolSize(memPool), 25 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolReserved(memPool), 25 * sizeof(float)); + + occaCopyMemToPtr(test, mem4, + occaAllBytes, 0, + occaDefault); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(static_cast(test[i]), i+25); + } + + occaFree(&mem4); + ASSERT_SAME_SIZE(occaDeviceMemoryAllocated(device), 25 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolSize(memPool), 25 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolReserved(memPool), 20 * sizeof(float)); + + occaFree(&mem); + ASSERT_SAME_SIZE(occaDeviceMemoryAllocated(device), 25 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolSize(memPool), 25 * sizeof(float)); + ASSERT_SAME_SIZE(occaMemoryPoolReserved(memPool), 0 * sizeof(float)); + + occaFree(&memPool); + ASSERT_SAME_SIZE(occaDeviceMemoryAllocated(device), 0); + + delete[] test; + delete[] data; +} diff --git a/tests/src/core/memoryPool.cpp b/tests/src/core/memoryPool.cpp new file mode 100644 index 000000000..7bc313700 --- /dev/null +++ b/tests/src/core/memoryPool.cpp @@ -0,0 +1,167 @@ +#include +#include + +void testReserve(); + +int main(const int argc, const char **argv) { + testReserve(); + + return 0; +} + +void testReserve() { +#define ASSERT_SAME_SIZE(a, b) \ + ASSERT_EQ((size_t) (a), (size_t) (b)) + + float *data = new float[30]; + float *test = new float[30]; + for (int i = 0; i < 30; ++i) { + data[i] = i; + } + + occa::device device({ + {"mode", "Serial"} + }); + + occa::experimental::memoryPool memPool = device.createMemoryPool(); + + /*Set aligment to 5*sizeof(float) bytes*/ + memPool.setAlignment(5 * sizeof(float)); + + /*Set a size for the memoryPool*/ + memPool.resize(10 * sizeof(float)); + + ASSERT_SAME_SIZE(device.memoryAllocated(), 10 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.size(), 10 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.reserved(), 0); + + /*Make a reservation*/ + occa::memory mem = memPool.reserve(10); + mem.copyFrom(data); + + ASSERT_SAME_SIZE(device.memoryAllocated(), 10 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.size(), 10 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.reserved(), 10 * sizeof(float)); + { + /*Test slicing mem in memoryPool. Should not trigger reallocation or + increase in memoryPool's reservation size*/ + occa::memory half1 = mem.slice(0, 5); + occa::memory half2 = mem.slice(5); + + ASSERT_SAME_SIZE(device.memoryAllocated(), 10 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.size(), 10 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.reserved(), 10 * sizeof(float)); + + half1.copyTo(test); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(static_cast(test[i]), i); + } + half2.copyTo(test); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(static_cast(test[i]), i+5); + } + + /*Trigger resize*/ + occa::memory mem2 = memPool.reserve(10); + + ASSERT_SAME_SIZE(device.memoryAllocated(), 20 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.size(), 20 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.reserved(), 20 * sizeof(float)); + + mem.copyTo(test); + for (int i = 0; i < 10; ++i) { + ASSERT_EQ(static_cast(test[i]), i); + } + half1.copyTo(test); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(static_cast(test[i]), i); + } + half2.copyTo(test); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(static_cast(test[i]), i+5); + } + } + + /*Delete buffers, memoryPool size does not change, but reservation is smaller*/ + ASSERT_SAME_SIZE(device.memoryAllocated(), 20 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.size(), 20 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.reserved(), 10 * sizeof(float)); + + /*Reserve again, should not trigger a resize*/ + occa::memory mem2 = memPool.reserve(10); + mem2.copyFrom(data+10); + + ASSERT_SAME_SIZE(device.memoryAllocated(), 20 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.size(), 20 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.reserved(), 20 * sizeof(float)); + + /*Trigger resize*/ + occa::memory mem3 = memPool.reserve(5); + occa::memory mem4 = memPool.reserve(5); + mem3.copyFrom(data+20); + mem4.copyFrom(data+25); + + ASSERT_SAME_SIZE(device.memoryAllocated(), 30 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.size(), 30 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.reserved(), 30 * sizeof(float)); + + mem2.copyTo(test); + for (int i = 0; i < 10; ++i) { + ASSERT_EQ(static_cast(test[i]), i+10); + } + + /*Delete mem and mem3 to make gaps*/ + mem.free(); + mem3.free(); + ASSERT_SAME_SIZE(device.memoryAllocated(), 30 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.size(), 30 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.reserved(), 15 * sizeof(float)); + + /*Trigger a resize again, which shifts mem2 and mem4 */ + mem = memPool.reserve(20); + ASSERT_SAME_SIZE(device.memoryAllocated(), 35 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.size(), 35 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.reserved(), 35 * sizeof(float)); + + mem2.copyTo(test); + for (int i = 0; i < 10; ++i) { + ASSERT_EQ(static_cast(test[i]), i+10); + } + mem4.copyTo(test); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(static_cast(test[i]), i+25); + } + + /*Manually free mem2*/ + mem2.free(); + ASSERT_SAME_SIZE(device.memoryAllocated(), 35 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.size(), 35 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.reserved(), 25 * sizeof(float)); + + /*Shrink pool to fit*/ + memPool.resize(memPool.reserved()); + ASSERT_SAME_SIZE(device.memoryAllocated(), 25 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.size(), 25 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.reserved(), 25 * sizeof(float)); + + mem4.copyTo(test); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(static_cast(test[i]), i+25); + } + + mem4.free(); + ASSERT_SAME_SIZE(device.memoryAllocated(), 25 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.size(), 25 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.reserved(), 20 * sizeof(float)); + + mem.free(); + ASSERT_SAME_SIZE(device.memoryAllocated(), 25 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.size(), 25 * sizeof(float)); + ASSERT_SAME_SIZE(memPool.reserved(), 0 * sizeof(float)); + + memPool.free(); + ASSERT_SAME_SIZE(device.memoryAllocated(), 0); + + delete[] test; + delete[] data; +} From cdcd1bd1c3535e430541a5339d18fdcd0f926657 Mon Sep 17 00:00:00 2001 From: Noel Chalmers Date: Tue, 18 Oct 2022 09:16:18 -0500 Subject: [PATCH 17/23] [Dtypes] Add support for std::byte dtype --- include/occa/dtype/builtins.hpp | 3 ++- include/occa/types/json.hpp | 2 +- src/c/dtype.cpp | 2 +- src/core/base.cpp | 4 ++-- src/core/device.cpp | 8 ++++---- src/dtype/builtins.cpp | 6 +++++- src/dtype/dtype.cpp | 6 +++--- src/occa/internal/core/memory.cpp | 2 +- src/occa/internal/lang/kernelMetadata.cpp | 2 +- src/occa/internal/lang/type/class.cpp | 2 +- src/occa/internal/lang/type/enum.cpp | 2 +- src/occa/internal/lang/type/function.cpp | 2 +- src/occa/internal/lang/type/functionPtr.cpp | 2 +- src/occa/internal/lang/type/lambda.cpp | 2 +- src/occa/internal/lang/type/union.cpp | 2 +- tests/src/dtype.cpp | 8 ++++---- 16 files changed, 30 insertions(+), 25 deletions(-) diff --git a/include/occa/dtype/builtins.hpp b/include/occa/dtype/builtins.hpp index 3de51e5aa..85a0bf28c 100644 --- a/include/occa/dtype/builtins.hpp +++ b/include/occa/dtype/builtins.hpp @@ -15,7 +15,7 @@ namespace occa { extern const dtype_t none; extern const dtype_t void_; - extern const dtype_t byte; + extern const dtype_t byte_; extern const dtype_t bool_; extern const dtype_t char_; @@ -105,6 +105,7 @@ namespace occa { // Primitive types template <> dtype_t get(); template <> dtype_t get(); + template <> dtype_t get(); template <> dtype_t get(); template <> dtype_t get(); template <> dtype_t get(); diff --git a/include/occa/types/json.hpp b/include/occa/types/json.hpp index fa78fa1f8..b76faaad7 100644 --- a/include/occa/types/json.hpp +++ b/include/occa/types/json.hpp @@ -521,7 +521,7 @@ namespace occa { inline dtype_t dtype() const { switch (type) { case null_: - return dtype::byte; + return dtype::byte_; case number_: return value_.number.dtype(); default: diff --git a/src/c/dtype.cpp b/src/c/dtype.cpp index b4e5c329d..6c337b816 100644 --- a/src/c/dtype.cpp +++ b/src/c/dtype.cpp @@ -82,7 +82,7 @@ occaJson occaDtypeToJson(occaDtype dtype) { const occaDtype occaDtypeNone = occa::c::newOccaType(occa::dtype::none); const occaDtype occaDtypeVoid = occa::c::newOccaType(occa::dtype::void_); -const occaDtype occaDtypeByte = occa::c::newOccaType(occa::dtype::byte); +const occaDtype occaDtypeByte = occa::c::newOccaType(occa::dtype::byte_); const occaDtype occaDtypeBool = occa::c::newOccaType(occa::dtype::bool_); const occaDtype occaDtypeChar = occa::c::newOccaType(occa::dtype::char_); diff --git a/src/core/base.cpp b/src/core/base.cpp index 784dfc295..ce11acb44 100644 --- a/src/core/base.cpp +++ b/src/core/base.cpp @@ -117,7 +117,7 @@ namespace occa { occa::memory malloc(const dim_t entries, const void *src, const occa::json &props) { - return getDevice().malloc(entries, dtype::byte, src, props); + return getDevice().malloc(entries, dtype::byte_, src, props); } occa::memory wrapMemory(const void *ptr, @@ -131,7 +131,7 @@ namespace occa { occa::memory wrapMemory(const void *ptr, const dim_t entries, const occa::json &props) { - return getDevice().wrapMemory(ptr, entries, dtype::byte, props); + return getDevice().wrapMemory(ptr, entries, dtype::byte_, props); } void memcpy(memory dest, const void *src, diff --git a/src/core/device.cpp b/src/core/device.cpp index a11c71612..438999bce 100644 --- a/src/core/device.cpp +++ b/src/core/device.cpp @@ -482,27 +482,27 @@ namespace occa { memory device::malloc(const dim_t entries, const void *src, const occa::json &props) { - return malloc(entries, dtype::byte, src, props); + return malloc(entries, dtype::byte_, src, props); } template <> memory device::malloc(const dim_t entries, const occa::memory src, const occa::json &props) { - return malloc(entries, dtype::byte, src, props); + return malloc(entries, dtype::byte_, src, props); } template <> memory device::malloc(const dim_t entries, const occa::json &props) { - return malloc(entries, dtype::byte, NULL, props); + return malloc(entries, dtype::byte_, NULL, props); } template <> occa::memory device::wrapMemory(const void *ptr, const dim_t entries, const occa::json &props) { - return wrapMemory(ptr, entries, dtype::byte, props); + return wrapMemory(ptr, entries, dtype::byte_, props); } occa::memory device::wrapMemory(const void *ptr, diff --git a/src/dtype/builtins.cpp b/src/dtype/builtins.cpp index 48174b4b8..646edfc36 100644 --- a/src/dtype/builtins.cpp +++ b/src/dtype/builtins.cpp @@ -5,7 +5,7 @@ namespace occa { const dtype_t none("none", 0, true); const dtype_t void_("void", 0, true); - const dtype_t byte("byte", 1, true); + const dtype_t byte_("byte", sizeof(std::byte), true); const dtype_t bool_("bool", sizeof(bool), true); const dtype_t char_("char", sizeof(char), true); @@ -78,6 +78,10 @@ namespace occa { return bool_; } + template <> dtype_t get() { + return byte_; + } + template <> dtype_t get() { return char_; } diff --git a/src/dtype/dtype.cpp b/src/dtype/dtype.cpp index 1e61889b5..71d2325bb 100644 --- a/src/dtype/dtype.cpp +++ b/src/dtype/dtype.cpp @@ -248,8 +248,8 @@ namespace occa { const dtype_t &to = other.self(); // Anything can be casted from/to bytes - if ((&from == &dtype::byte) || - (&to == &dtype::byte)) { + if ((&from == &dtype::byte_) || + (&to == &dtype::byte_)) { return true; } @@ -322,7 +322,7 @@ namespace occa { dtypeMap["none"] = &dtype::none; dtypeMap["void"] = &dtype::void_; - dtypeMap["byte"] = &dtype::byte; + dtypeMap["byte"] = &dtype::byte_; dtypeMap["bool"] = &dtype::bool_; dtypeMap["char"] = &dtype::char_; diff --git a/src/occa/internal/core/memory.cpp b/src/occa/internal/core/memory.cpp index 4f98132ad..395769f7a 100644 --- a/src/occa/internal/core/memory.cpp +++ b/src/occa/internal/core/memory.cpp @@ -8,7 +8,7 @@ namespace occa { udim_t size_, dim_t offset_) : modeBuffer(modeBuffer_), ptr(NULL), - dtype_(&dtype::byte), + dtype_(&dtype::byte_), size(size_), offset(offset_) { modeBuffer->addModeMemoryRef(this); diff --git a/src/occa/internal/lang/kernelMetadata.cpp b/src/occa/internal/lang/kernelMetadata.cpp index fc44875d7..07ba43477 100644 --- a/src/occa/internal/lang/kernelMetadata.cpp +++ b/src/occa/internal/lang/kernelMetadata.cpp @@ -7,7 +7,7 @@ namespace occa { argMetadata_t::argMetadata_t() : isConst(false), isPtr(false), - dtype(dtype::byte) {} + dtype(dtype::byte_) {} argMetadata_t::argMetadata_t(const bool isConst_, const bool isPtr_, diff --git a/src/occa/internal/lang/type/class.cpp b/src/occa/internal/lang/type/class.cpp index f7217f578..f215b9aee 100644 --- a/src/occa/internal/lang/type/class.cpp +++ b/src/occa/internal/lang/type/class.cpp @@ -14,7 +14,7 @@ namespace occa { } dtype_t class_t::dtype() const { - return dtype::byte; + return dtype::byte_; } void class_t::printDeclaration(printer &pout) const { diff --git a/src/occa/internal/lang/type/enum.cpp b/src/occa/internal/lang/type/enum.cpp index 97e1394e2..09709d415 100644 --- a/src/occa/internal/lang/type/enum.cpp +++ b/src/occa/internal/lang/type/enum.cpp @@ -14,7 +14,7 @@ namespace occa { } dtype_t enum_t::dtype() const { - return dtype::byte; + return dtype::byte_; } void enum_t::printDeclaration(printer &pout) const { diff --git a/src/occa/internal/lang/type/function.cpp b/src/occa/internal/lang/type/function.cpp index 402340e03..385579613 100644 --- a/src/occa/internal/lang/type/function.cpp +++ b/src/occa/internal/lang/type/function.cpp @@ -46,7 +46,7 @@ namespace occa { } dtype_t function_t::dtype() const { - return dtype::byte; + return dtype::byte_; } void function_t::addArgument(const variable_t &arg) { diff --git a/src/occa/internal/lang/type/functionPtr.cpp b/src/occa/internal/lang/type/functionPtr.cpp index 7da3dcf9d..33eb6c5fc 100644 --- a/src/occa/internal/lang/type/functionPtr.cpp +++ b/src/occa/internal/lang/type/functionPtr.cpp @@ -50,7 +50,7 @@ namespace occa { } dtype_t functionPtr_t::dtype() const { - return dtype::byte; + return dtype::byte_; } bool functionPtr_t::equals(const type_t &other) const { diff --git a/src/occa/internal/lang/type/lambda.cpp b/src/occa/internal/lang/type/lambda.cpp index 600adcd45..d9fa86bdf 100644 --- a/src/occa/internal/lang/type/lambda.cpp +++ b/src/occa/internal/lang/type/lambda.cpp @@ -56,7 +56,7 @@ lambda_t::lambda_t(capture_t capture_,const blockStatement& body_) dtype_t lambda_t::dtype() const { - return dtype::byte; + return dtype::byte_; } bool lambda_t::equals(const type_t &other) const diff --git a/src/occa/internal/lang/type/union.cpp b/src/occa/internal/lang/type/union.cpp index 32a01d9b0..8538be26f 100644 --- a/src/occa/internal/lang/type/union.cpp +++ b/src/occa/internal/lang/type/union.cpp @@ -14,7 +14,7 @@ namespace occa { } dtype_t union_t::dtype() const { - return dtype::byte; + return dtype::byte_; } void union_t::printDeclaration(printer &pout) const { diff --git a/tests/src/dtype.cpp b/tests/src/dtype.cpp index fc9771472..b940d465c 100644 --- a/tests/src/dtype.cpp +++ b/tests/src/dtype.cpp @@ -102,16 +102,16 @@ void testCasting() { // double <---> byte // double2 <---> byte ASSERT_TRUE( - occa::dtype::double_.canBeCastedTo(occa::dtype::byte) + occa::dtype::double_.canBeCastedTo(occa::dtype::byte_) ); ASSERT_TRUE( - occa::dtype::byte.canBeCastedTo(occa::dtype::double_) + occa::dtype::byte_.canBeCastedTo(occa::dtype::double_) ); ASSERT_TRUE( - occa::dtype::double2.canBeCastedTo(occa::dtype::byte) + occa::dtype::double2.canBeCastedTo(occa::dtype::byte_) ); ASSERT_TRUE( - occa::dtype::byte.canBeCastedTo(occa::dtype::double2) + occa::dtype::byte_.canBeCastedTo(occa::dtype::double2) ); } From 05c43e53ea15d99629e96d2eee52b6663729e446 Mon Sep 17 00:00:00 2001 From: Noel Chalmers Date: Wed, 19 Oct 2022 10:43:03 -0500 Subject: [PATCH 18/23] First steps for making a user-dtype registry, and make the byte dtype safe for c++11 --- include/occa/dtype/builtins.hpp | 42 +++++++++++++++++++++++++++++---- include/occa/dtype/dtype.hpp | 11 +++++++++ src/dtype/builtins.cpp | 23 +++++++++++++----- src/dtype/dtype.cpp | 15 ++++++++++-- 4 files changed, 78 insertions(+), 13 deletions(-) diff --git a/include/occa/dtype/builtins.hpp b/include/occa/dtype/builtins.hpp index 85a0bf28c..afa517c6a 100644 --- a/include/occa/dtype/builtins.hpp +++ b/include/occa/dtype/builtins.hpp @@ -5,11 +5,14 @@ #include #include #include +#include +#include namespace occa { class memory; typedef std::vector dtypeVector; + typedef std::unordered_map dtypeMap; namespace dtype { extern const dtype_t none; @@ -75,16 +78,42 @@ namespace occa { extern const dtype_t double3; extern const dtype_t double4; + extern const dtype_t ptr; + // OCCA Types extern const dtype_t memory; + // User type registry + extern dtypeMap registry; + // Templated types - template + template dtype_t get() { - if (!typeMetadata::isPointer) { - return none; + if (std::is_pointer::value) { + return dtype::ptr; + } + + using T = typename std::decay::type; + if (!std::is_same::value) { + return dtype::get(); + } + + auto it = registry.find(typeid(T).hash_code()); + if (it != registry.end()) { + return it->second; + } else { + static_assert(std::is_trivial::value + || std::is_standard_layout::value, + "Cannot register types that are not POD structs"); + auto entry = registry.emplace( + std::piecewise_construct, + std::forward_as_tuple(typeid(T).hash_code()), + std::forward_as_tuple(typeid(T).name(), + dtype_t::tuple(byte_, sizeof(T)), + true) + ); + return entry.first->second; } - return get::baseType>(); } template @@ -105,7 +134,6 @@ namespace occa { // Primitive types template <> dtype_t get(); template <> dtype_t get(); - template <> dtype_t get(); template <> dtype_t get(); template <> dtype_t get(); template <> dtype_t get(); @@ -140,6 +168,10 @@ namespace occa { template <> dtype_t get(); template <> dtype_t get(); +#if __cplusplus >= 201703L + template <> dtype_t get(); +#endif + // OCCA Types template <> dtype_t get(); } diff --git a/include/occa/dtype/dtype.hpp b/include/occa/dtype/dtype.hpp index 6fce760ac..9857c141c 100644 --- a/include/occa/dtype/dtype.hpp +++ b/include/occa/dtype/dtype.hpp @@ -153,6 +153,17 @@ namespace occa { */ const strVector& structFieldNames() const; + /** + * @startDoc{baseDtype} + * + * Description: + * Return the base dtype of this type. For example + * `int2` would return the 'int' dtype. + * + * @endDoc + */ + const dtype_t& baseDtype() const; + /** * @startDoc{operator_bracket[0]} * diff --git a/src/dtype/builtins.cpp b/src/dtype/builtins.cpp index 646edfc36..f6a33e5de 100644 --- a/src/dtype/builtins.cpp +++ b/src/dtype/builtins.cpp @@ -3,9 +3,7 @@ namespace occa { namespace dtype { const dtype_t none("none", 0, true); - const dtype_t void_("void", 0, true); - const dtype_t byte_("byte", sizeof(std::byte), true); const dtype_t bool_("bool", sizeof(bool), true); const dtype_t char_("char", sizeof(char), true); @@ -65,9 +63,20 @@ namespace occa { const dtype_t double3("double3", dtype_t::tuple(double_, 3), true); const dtype_t double4("double4", dtype_t::tuple(double_, 4), true); + const dtype_t ptr("pointer", sizeof(void*), true); + +#if __cplusplus >= 201703L + const dtype_t byte_("byte", sizeof(std::byte), true); +#else + const dtype_t byte_("byte", 1, true); +#endif + // OCCA Types const dtype_t memory("occa::memory", 0, true); + // User type registry + dtypeMap registry; + // Templated types template <> dtype_t get() { return void_; @@ -78,10 +87,6 @@ namespace occa { return bool_; } - template <> dtype_t get() { - return byte_; - } - template <> dtype_t get() { return char_; } @@ -218,5 +223,11 @@ namespace occa { template <> dtype_t get() { return memory; } + +#if __cplusplus >= 201703L + template <> dtype_t get() { + return byte_; + } +#endif } } diff --git a/src/dtype/dtype.cpp b/src/dtype/dtype.cpp index 71d2325bb..7b47d1481 100644 --- a/src/dtype/dtype.cpp +++ b/src/dtype/dtype.cpp @@ -136,6 +136,14 @@ namespace occa { return structPtr->fieldNames; } + const dtype_t& dtype_t::baseDtype() const { + if (self().tuple_) { + return self().tuple_->dtype.self(); + } else { + return self(); + } + } + const dtype_t& dtype_t::operator [] (const int field) const { const dtypeStruct_t *structPtr = self().struct_; OCCA_ERROR("Cannot access fields from a non-struct dtype_t", @@ -248,8 +256,11 @@ namespace occa { const dtype_t &to = other.self(); // Anything can be casted from/to bytes - if ((&from == &dtype::byte_) || - (&to == &dtype::byte_)) { + const dtype_t &fromBase = from.baseDtype(); + const dtype_t &toBase = to.baseDtype(); + + if ((&fromBase == &dtype::byte_) || + (&toBase == &dtype::byte_)) { return true; } From e523bbe28f2b66e8df220dc5c2d66e18b5f80b14 Mon Sep 17 00:00:00 2001 From: Noel Chalmers Date: Wed, 19 Oct 2022 16:06:15 -0500 Subject: [PATCH 19/23] Add some new methods to register and deregister dtypes through templated methods --- include/occa/dtype/builtins.hpp | 6 --- include/occa/dtype/dtype.hpp | 91 ++++++++++++++++++++++++++++++++- src/dtype/builtins.cpp | 3 -- src/dtype/dtype.cpp | 7 ++- 4 files changed, 95 insertions(+), 12 deletions(-) diff --git a/include/occa/dtype/builtins.hpp b/include/occa/dtype/builtins.hpp index afa517c6a..12c543c91 100644 --- a/include/occa/dtype/builtins.hpp +++ b/include/occa/dtype/builtins.hpp @@ -5,14 +5,11 @@ #include #include #include -#include -#include namespace occa { class memory; typedef std::vector dtypeVector; - typedef std::unordered_map dtypeMap; namespace dtype { extern const dtype_t none; @@ -83,9 +80,6 @@ namespace occa { // OCCA Types extern const dtype_t memory; - // User type registry - extern dtypeMap registry; - // Templated types template dtype_t get() { diff --git a/include/occa/dtype/dtype.hpp b/include/occa/dtype/dtype.hpp index 9857c141c..1c077be93 100644 --- a/include/occa/dtype/dtype.hpp +++ b/include/occa/dtype/dtype.hpp @@ -4,7 +4,11 @@ #include #include #include +#include +#include +#include +#include #include @@ -48,7 +52,7 @@ namespace occa { dtype_t(); dtype_t(const std::string &name__, - const int bytes__ = 0, + const int bytes__, const bool registered_ = false); dtype_t(const std::string &name__, @@ -326,6 +330,91 @@ namespace occa { std::string toString(const std::string &varName = "") const; }; //==================================== + + namespace dtype { + + typedef std::unordered_map dtypeMap; + + // User type registry + extern dtypeMap registry; + + template + dtype_t registerType(const std::string &name__, + std::vector fieldNames, + std::vector fieldTypes) { + + static_assert(!std::is_pointer::value, + "Cannot register pointer types"); + + OCCA_ERROR("Must have same number of field names and types", + fieldNames.size() == fieldTypes.size()); + OCCA_ERROR("Must have a positive integer number of fields", + fieldNames.size() > 0); + + using T = typename std::decay::type; + + auto it = registry.find(typeid(T).hash_code()); + + OCCA_ERROR("Type " << name__ << "[ i.e. " << typeid(T).name() + << " ] already registered.", + it == registry.end()); + + static_assert(std::is_trivial::value + || std::is_standard_layout::value, + "Cannot register types that are not POD structs"); + auto entry = registry.emplace( + std::piecewise_construct, + std::forward_as_tuple(typeid(T).hash_code()), + std::forward_as_tuple(name__, sizeof(T), true) + ); + dtype_t& type = entry.first->second; + + for (int i = 0; i < fieldNames.size(); ++i) { + type.addField(fieldNames[i], fieldTypes[i]); + } + + return type; + } + + template + dtype_t registerType(const std::string &name__, + const dtype_t &dtype, + const int size=1) { + + static_assert(!std::is_pointer::value, + "Cannot register pointer types"); + + OCCA_ERROR("Tuple must have a positive integer size", + size > 0); + + using T = typename std::decay::type; + + auto it = registry.find(typeid(T).hash_code()); + + OCCA_ERROR("Type " << name__ << "[ i.e. " << typeid(T).name() + << " ] already registered.", + it == registry.end()); + + static_assert(std::is_trivial::value + || std::is_standard_layout::value, + "Cannot register types that are not POD structs"); + auto entry = registry.emplace( + std::piecewise_construct, + std::forward_as_tuple(typeid(T).hash_code()), + std::forward_as_tuple(name__, + dtype_t::tuple(dtype, size), + true) + ); + return entry.first->second; + } + + template + void deRegisterType() { + using T = typename std::decay::type; + registry.erase(typeid(T).hash_code()); + } + + } } #endif diff --git a/src/dtype/builtins.cpp b/src/dtype/builtins.cpp index f6a33e5de..74b2fb0b4 100644 --- a/src/dtype/builtins.cpp +++ b/src/dtype/builtins.cpp @@ -74,9 +74,6 @@ namespace occa { // OCCA Types const dtype_t memory("occa::memory", 0, true); - // User type registry - dtypeMap registry; - // Templated types template <> dtype_t get() { return void_; diff --git a/src/dtype/dtype.cpp b/src/dtype/dtype.cpp index 7b47d1481..e5e0f2d1a 100644 --- a/src/dtype/dtype.cpp +++ b/src/dtype/dtype.cpp @@ -6,6 +6,11 @@ #include namespace occa { + namespace dtype { + // User type registry + dtypeMap registry; + } + //---[ Dtype_T ]------------------------ dtype_t::dtype_t() : ref(NULL), @@ -172,8 +177,6 @@ namespace occa { struct_ = new dtypeStruct_t(); } - bytes_ += (dtype.bytes_ * tupleSize_); - if (tupleSize_ == 1) { struct_->addField(field, dtype); } else { From 9cbd0a0bffad3a643d1b6409fc09405ac19c4270 Mon Sep 17 00:00:00 2001 From: Noel Chalmers Date: Wed, 19 Oct 2022 16:06:50 -0500 Subject: [PATCH 20/23] Add some example of templated methods to example 05 --- examples/cpp/05_custom_types/main.cpp | 64 ++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/examples/cpp/05_custom_types/main.cpp b/examples/cpp/05_custom_types/main.cpp index 58494ddac..5f49d63ae 100644 --- a/examples/cpp/05_custom_types/main.cpp +++ b/examples/cpp/05_custom_types/main.cpp @@ -62,10 +62,10 @@ int main(int argc, const char **argv) { myFloatDtype.registerType(); // Struct dtype - occa::dtype_t myFloat2Dtype; - myFloat2Dtype.registerType(); + occa::dtype_t myFloat2Dtype("myFloat2", sizeof(myFloat2)); myFloat2Dtype.addField("x", occa::dtype::float_); myFloat2Dtype.addField("y", occa::dtype::float_); + myFloat2Dtype.registerType(); // Tuple dtype occa::dtype_t myFloat4Dtype = occa::dtype_t::tuple(occa::dtype::float_, 4); @@ -108,6 +108,66 @@ int main(int argc, const char **argv) { } } + o_a.free(); + o_b.free(); + o_ab.free(); + + // Templated dtypes + // - occa::memory dtypes can be created and assigned via templated methods + // - dtypes registered this way are tracked internally and destroyed when deregistered + // - Registered dtype_t objects are treated as singletons and assumed + // to exist while the memory objects are still alive + + // Automatic creation + // Using a type that has not been registered with OCCA will be + // automatically registered as a byte field (thus all type-checking is disabled) + o_a = occa::malloc(entries); + + // Struct dtype + occa::dtype::registerType("myFloat2", + {"x", "y"}, + {occa::dtype::float_, occa::dtype::float_}); + + o_b = occa::malloc(entries / 2); + + // Tuple dtype + occa::dtype::registerType("myFloat4", occa::dtype::float_, 4); + + o_ab = occa::malloc(entries / 4); + + // Copy memory to the device + o_a.copyFrom(a); + o_b.copyFrom(b); + + // Launch device kernel + addVectors(entries, + o_a.cast(occa::dtype::float_), + o_b, + o_ab); + + // Copy result to the host + o_ab.copyTo(ab); + + // Assert values + for (int i = 0; i < (entries / 4); ++i) { + for (int j = 0; j < 4; ++j) { + std::cout << '(' << i << ',' << j << ") : " << ab[i].values[j] << '\n'; + } + } + for (int i = 0; i < entries; ++i) { + float a_i = a[i].value; + float b_i = (i % 2) ? b[i / 2].y : b[i / 2].x; + float ab_i = ab[i / 4].values[i % 4]; + if (!occa::areBitwiseEqual(ab_i, a_i + b_i)) { + throw 1; + } + } + + // Can deregister templated dtypes if they're no longer needed + occa::dtype::deRegisterType(); + occa::dtype::deRegisterType(); + occa::dtype::deRegisterType(); + // Free host memory delete [] a; delete [] b; From a4110586260e711e0ef38c419f5207979cff17dc Mon Sep 17 00:00:00 2001 From: Noel Chalmers Date: Wed, 19 Oct 2022 16:07:12 -0500 Subject: [PATCH 21/23] Improve dtype tests to cover new methods --- tests/src/dtype.cpp | 83 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 7 deletions(-) diff --git a/tests/src/dtype.cpp b/tests/src/dtype.cpp index b940d465c..0162d8599 100644 --- a/tests/src/dtype.cpp +++ b/tests/src/dtype.cpp @@ -33,24 +33,48 @@ void testDtype() { ASSERT_NEQ(occa::dtype::double_, fakeDouble); - occa::dtype_t foo1("foo"); + occa::dtype_t foo1("foo", occa::dtype::double_.bytes()); foo1.addField("a", occa::dtype::double_); - occa::dtype_t foo2("foo"); + occa::dtype_t foo2("foo", occa::dtype::double_.bytes()); foo2.addField("a", occa::dtype::double_); - occa::dtype_t foo3("foo"); + occa::dtype_t foo3("foo", 2 * occa::dtype::double_.bytes()); foo3.addField("a", occa::dtype::double_) .addField("b", occa::dtype::double_); - occa::dtype_t foo4("foo"); + occa::dtype_t foo4("foo", 2 * occa::dtype::double_.bytes()); foo4.addField("b", occa::dtype::double_) .addField("a", occa::dtype::double_); + struct myStruct { + double a; + double b; + }; + + struct myStruct2 { + double a[2]; + }; + + occa::dtype::registerType("myStruct", + {"a", "b"}, + {occa::dtype::double_, + occa::dtype::double_}); + + occa::dtype_t foo5 = occa::dtype::get(); + + occa::dtype::registerType("myStruct2", + occa::dtype::double_, + 2); + + occa::dtype_t foo6 = occa::dtype::get(); + ASSERT_EQ(foo1, foo1); ASSERT_NEQ(foo1, foo2); ASSERT_NEQ(foo1, foo3); ASSERT_NEQ(foo1, foo4); + ASSERT_NEQ(foo1, foo5); + ASSERT_NEQ(foo1, foo6); ASSERT_NEQ(foo3, foo4); ASSERT_TRUE(foo1.matches(foo1)); @@ -58,21 +82,53 @@ void testDtype() { ASSERT_FALSE(foo1.matches(foo3)); ASSERT_FALSE(foo1.matches(foo4)); ASSERT_FALSE(foo3.matches(foo4)); + ASSERT_FALSE(foo3.matches(foo5)); + ASSERT_FALSE(foo5.matches(foo6)); + + occa::dtype::deRegisterType(); + occa::dtype::deRegisterType(); } void testCasting() { - occa::dtype_t foo1("foo"); + occa::dtype_t foo1("foo", 2 * occa::dtype::double_.bytes()); foo1.addField("a", occa::dtype::double_) .addField("b", occa::dtype::double_); - occa::dtype_t foo2("foo"); + occa::dtype_t foo2("foo", 2 * occa::dtype::double_.bytes()); foo2.addField("b", occa::dtype::double_) .addField("a", occa::dtype::double_); + struct myStruct { + double a; + double b; + }; + + struct myStruct2 { + double a[2]; + }; + + occa::dtype::registerType("myStruct", + {"a", "b"}, + {occa::dtype::double_, + occa::dtype::double_}); + + occa::dtype_t foo5 = occa::dtype::get(); + + occa::dtype::registerType("myStruct2", + occa::dtype::double_, + 2); + + occa::dtype_t foo6 = occa::dtype::get(); + ASSERT_NEQ(foo1, foo2); ASSERT_FALSE(foo1.matches(foo2)); ASSERT_TRUE(foo1.canBeCastedTo(foo2)); ASSERT_TRUE(foo2.canBeCastedTo(foo1)); + ASSERT_TRUE(foo2.canBeCastedTo(foo5)); + ASSERT_TRUE(foo2.canBeCastedTo(foo6)); + + occa::dtype::deRegisterType(); + occa::dtype::deRegisterType(); // double <---> double2 ASSERT_NEQ(occa::dtype::double_, @@ -128,13 +184,26 @@ void testGet() { types[1]); ASSERT_EQ(occa::dtype::int_, types[2]); + + struct myStruct { + double a; + double b; + }; + + occa::dtype_t foo = occa::dtype::get(); + ASSERT_EQ(occa::dtype::byte_, + foo.baseDtype()); + ASSERT_EQ(2 * occa::dtype::double_.bytes(), + foo.bytes()); + + occa::dtype::deRegisterType(); } void testJsonMethods() { ASSERT_EQ(occa::dtype::toJson(occa::dtype::double_).toString(), occa::json::parse("{ type: 'builtin', name: 'double' }").toString()); - occa::dtype_t foo("foo"); + occa::dtype_t foo("foo", 2 * occa::dtype::double_.bytes()); foo.addField("a", occa::dtype::double_) .addField("b", occa::dtype::double_); From 13d3d4aa018e6afb201956a7af154bf7f595580f Mon Sep 17 00:00:00 2001 From: Noel Chalmers Date: Mon, 31 Oct 2022 09:02:32 -0500 Subject: [PATCH 22/23] Fix a byte dtype name --- src/core/memoryPool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/memoryPool.cpp b/src/core/memoryPool.cpp index cab3f988f..3e5fb3e3f 100644 --- a/src/core/memoryPool.cpp +++ b/src/core/memoryPool.cpp @@ -172,7 +172,7 @@ namespace occa { template <> memory memoryPool::reserve(const dim_t entries) { - return reserve(entries, dtype::byte); + return reserve(entries, dtype::byte_); } void memoryPool::setAlignment(const udim_t alignment) { From f63ef377a8dc1565277fa0c838f3d7b21015a021 Mon Sep 17 00:00:00 2001 From: Noel Chalmers Date: Mon, 31 Oct 2022 09:03:28 -0500 Subject: [PATCH 23/23] Fix a signed comparison --- include/occa/dtype/dtype.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/occa/dtype/dtype.hpp b/include/occa/dtype/dtype.hpp index 1c077be93..743c49e16 100644 --- a/include/occa/dtype/dtype.hpp +++ b/include/occa/dtype/dtype.hpp @@ -369,7 +369,7 @@ namespace occa { ); dtype_t& type = entry.first->second; - for (int i = 0; i < fieldNames.size(); ++i) { + for (size_t i = 0; i < fieldNames.size(); ++i) { type.addField(fieldNames[i], fieldTypes[i]); }