From 4bfd7ce5164fd8671234bb821696b64a4dc464d8 Mon Sep 17 00:00:00 2001 From: "Daniel N. Baker" Date: Wed, 8 Dec 2021 08:30:26 -0500 Subject: [PATCH 1/2] Switch from C-Python API to Pybind11 to use one version for all Python versions --- .gitmodules | 3 ++ fht-pybind11.cpp | 116 +++++++++++++++++++++++++++++++++++++++++++++++ pybind11 | 1 + setup.py | 12 ++--- 4 files changed, 123 insertions(+), 9 deletions(-) create mode 100644 .gitmodules create mode 100644 fht-pybind11.cpp create mode 160000 pybind11 diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..2726187 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "pybind11"] + path = pybind11 + url = https://github.com/pybind/pybind11 diff --git a/fht-pybind11.cpp b/fht-pybind11.cpp new file mode 100644 index 0000000..3380b53 --- /dev/null +++ b/fht-pybind11.cpp @@ -0,0 +1,116 @@ +#include "pybind11/pybind11.h" +#include "pybind11/numpy.h" +#include "fht.h" + +namespace py = pybind11; + +static constexpr inline unsigned ilog2(size_t x) noexcept { + return __builtin_ctz(x); +} + +template +py::array_t npfht_oop(py::array_t input) { + auto bi = input.request(); + if(bi.ndim != 1) throw std::invalid_argument("input must be 1-d array"); + const size_t n = bi.size; + if(!n || (n & (n - 1)) != 0u) { + throw std::invalid_argument("Input must be power-of-2 sized"); + } + if(sizeof(T) != bi.itemsize) { + std::fprintf(stderr, "Error: Corrupted NumPy array (itemsize does not match data type).\n"); + throw std::runtime_error("Error: Corrupted NumPy array (itemsize does not match data type)."); + } + py::array_t ret(n); + py::buffer_info rbi = ret.request(); + if(::fht((T *)bi.ptr, (T *)rbi.ptr, ilog2(n))) { + throw std::runtime_error("FHT did not work properly"); + } + return ret; +} + +template +py::array_t npfht_ip(py::array_t input) { + auto bi = input.request(); + T *ptr = (T *)bi.ptr; + if(bi.ndim != 1) throw std::invalid_argument("input must be 1-d array"); + const size_t n = bi.size; + if(!n || (n & (n - 1)) != 0u) { + throw std::invalid_argument("Input must be power-of-2 sized"); + } + if(sizeof(T) != bi.itemsize) { + std::fprintf(stderr, "Error: Corrupted NumPy array (itemsize does not match data type).\n"); + throw std::runtime_error("Error: Corrupted NumPy array (itemsize does not match data type)."); + } + if(::fht(ptr, ilog2(n))) { + throw std::runtime_error("FHT did not work properly"); + } + return input; +} + +#define UNUSED(x) (void)(x) +py::array npfht(py::array input) { + py::buffer_info bi = input.request(); + if(bi.format.front() == 'f') { + py::array_t farr(input); + return npfht_oop(farr); + } else { + py::array_t farr(input); + return npfht_oop(farr); + } +} +py::array npfht_(py::array input) { + py::buffer_info bi = input.request(); + if(bi.format.front() == 'f') { + py::array_t farr(input); + return npfht_ip(farr); + } else { + py::array_t farr(input); + return npfht_ip(farr); + } +} + + + + +static char module_docstring[] = + "A C extension that computes the Fast Hadamard Transform"; +static char fht_docstring[] = + "Compute the Fast Hadamard Transform (FHT) for a given " + "one-dimensional NumPy array.\n\n" + "The Hadamard Transform is a linear orthogonal map defined on real vectors " + "whose length is a _power of two_. For the precise definition, see the " + "[Wikipedia entry](https://en.wikipedia.org/wiki/Hadamard_transform). The " + "Hadamard Transform has been recently used a lot in various machine " + "learning " + "and numerical algorithms.\n\n" + "The implementation uses " + "[AVX](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) " + "to speed up the computation. If AVX is not supported on your machine, " + "a simpler implementation without (explicit) vectorization is used.\n\n" + "The function takes two parameters:\n\n" + "* `buffer` is a NumPy array which is being transformed. It must be a " + "one-dimensional array with `dtype` equal to `float32` or `float64` (the " + "former is recommended unless you need high accuracy) and of size being a " + "power " + "of two. If your CPU supports AVX, then `buffer` must be aligned to 32 " + "bytes. " + "To allocate such an aligned buffer, use the function `created_aligned` " + "from this " + "module.\n" + "* `chunk` is a positive integer that controls when the implementation " + "switches " + "from recursive to iterative algorithm. The overall algorithm is " + "recursive, but as " + "soon as the vector becomes no longer than `chunk`, the iterative " + "algorithm is " + "invoked. For technical reasons, `chunk` must be at least 8. A good choice " + "is to " + "set `chunk` to 1024. But to fine-tune the performance one should use a " + "program " + "`best_chunk` supplied with the library.\n"; + +PYBIND11_MODULE(ffht, m) { + m.doc() = module_docstring; + m.def("fht", npfht, py::arg("input"), fht_docstring); + m.def("fht_", npfht_, py::arg("input"), "In-place version of ffht.fht. See ffht.fht documentation for details"); +} diff --git a/pybind11 b/pybind11 new file mode 160000 index 0000000..59aa998 --- /dev/null +++ b/pybind11 @@ -0,0 +1 @@ +Subproject commit 59aa99860c60bd171b9565e9920f125fdb749267 diff --git a/setup.py b/setup.py index f4841cb..9b9058a 100644 --- a/setup.py +++ b/setup.py @@ -18,19 +18,13 @@ sys.stderr.write('NumPy not found!\n') raise -if sys.version_info[0] == 2: - arr_sources = ['_ffht_2.c', 'fht.c'] - -if sys.version_info[0] == 3: - arr_sources = ['_ffht_3.c', 'fht.c'] - module = Extension('ffht', - sources= arr_sources, + sources=["fht-pybind11.cpp", "fht.c", "fast_copy.c"], extra_compile_args=['-march=native', '-O3', '-Wall', '-Wextra', '-pedantic', '-Wshadow', '-Wpointer-arith', '-Wcast-qual', '-Wstrict-prototypes', '-Wmissing-prototypes', - '-std=c99', '-DFHT_HEADER_ONLY'], - include_dirs=[np.get_include()]) + '-std=c++11'], + include_dirs=[np.get_include()] + ["pybind11/include"]) setup(name='FFHT', version='1.1', From 23738856026c2869109250ec7667002fa1d8810b Mon Sep 17 00:00:00 2001 From: "Daniel N. Baker" Date: Wed, 8 Dec 2021 11:57:21 -0500 Subject: [PATCH 2/2] Add 2D support to FFHT, along with parallelization across rows. Adds OpenMP compilation requirement. --- fht-pybind11.cpp | 73 ++++++++++++++++++++++++++++++++++++++++++++---- setup.py | 2 +- 2 files changed, 69 insertions(+), 6 deletions(-) diff --git a/fht-pybind11.cpp b/fht-pybind11.cpp index 3380b53..8b93100 100644 --- a/fht-pybind11.cpp +++ b/fht-pybind11.cpp @@ -28,6 +28,60 @@ py::array_t npfht_oop(py::array_t input) { return ret; } +template +py::array_t npfht2d_oop(py::array_t input, int nthreads=1) { + auto bi = input.request(); + if(bi.ndim != 2) throw std::invalid_argument("input must be 2-d array"); + const py::ssize_t n = bi.shape[1]; + if(!n || (n & (n - 1)) != 0u) { + throw std::invalid_argument("Input must be power-of-2 dimension"); + } + const py::ssize_t nrows = bi.shape[0]; + if(sizeof(T) != bi.itemsize) { + std::fprintf(stderr, "Error: Corrupted NumPy array (itemsize does not match data type).\n"); + throw std::runtime_error("Error: Corrupted NumPy array (itemsize does not match data type)."); + } + py::array_t ret(std::vector{nrows, n}); + py::buffer_info rbi = ret.request(); + std::vector rvs(nrows); + const int il2 = ilog2(n); + #pragma omp parallel for num_threads(nthreads) + for(size_t i = 0; i < nrows; ++i) { + rvs[i] = ::fht((T *)bi.ptr + i * n, (T *)rbi.ptr + i * n, il2); + } + if(std::any_of(rvs.begin(), rvs.end(), [](int8_t x) {return x != 0;})) { + throw std::runtime_error("FHT did not work properly"); + } + return ret; +} + +template +py::array_t npfht2d_ip(py::array_t input, int nthreads=1) { + auto bi = input.request(); + if(bi.ndim != 2) throw std::invalid_argument("input must be 2-d array"); + const py::ssize_t n = bi.shape[1]; + if(!n || (n & (n - 1)) != 0u) { + throw std::invalid_argument("Input must be power-of-2 dimension"); + } + const py::ssize_t nrows = bi.shape[0]; + if(sizeof(T) != bi.itemsize) { + std::fprintf(stderr, "Error: Corrupted NumPy array (itemsize does not match data type).\n"); + throw std::runtime_error("Error: Corrupted NumPy array (itemsize does not match data type)."); + } + std::vector rvs(nrows); + const int il2 = ilog2(n); + #pragma omp parallel for num_threads(nthreads) + for(size_t i = 0; i < nrows; ++i) { + rvs[i] = ::fht((T *)bi.ptr + i * n, (T *)bi.ptr + i * n, il2); + } + if(std::any_of(rvs.begin(), rvs.end(), [](int8_t x) {return x != 0;})) { + throw std::runtime_error("FHT did not work properly"); + } + return input; +} + + + template py::array_t npfht_ip(py::array_t input) { auto bi = input.request(); @@ -47,24 +101,33 @@ py::array_t npfht_ip(py::array_t input) { return input; } -#define UNUSED(x) (void)(x) -py::array npfht(py::array input) { +py::array npfht(py::array input, py::ssize_t nthreads=1) { py::buffer_info bi = input.request(); + if(bi.ndim > 2) throw std::runtime_error("ndim > 2. Only accepted: 1d and 2d numpy arrays"); if(bi.format.front() == 'f') { py::array_t farr(input); + if(bi.ndim == 2) + return npfht2d_oop(farr, nthreads); return npfht_oop(farr); } else { py::array_t farr(input); + if(bi.ndim == 2) + return npfht2d_oop(farr, nthreads); return npfht_oop(farr); } } -py::array npfht_(py::array input) { + +py::array npfht_(py::array input, py::ssize_t nthreads=1) { py::buffer_info bi = input.request(); if(bi.format.front() == 'f') { py::array_t farr(input); + if(bi.ndim == 2) + return npfht2d_ip(farr, nthreads); return npfht_ip(farr); } else { py::array_t farr(input); + if(bi.ndim == 2) + return npfht2d_ip(farr, nthreads); return npfht_ip(farr); } } @@ -111,6 +174,6 @@ static char fht_docstring[] = PYBIND11_MODULE(ffht, m) { m.doc() = module_docstring; - m.def("fht", npfht, py::arg("input"), fht_docstring); - m.def("fht_", npfht_, py::arg("input"), "In-place version of ffht.fht. See ffht.fht documentation for details"); + m.def("fht", npfht, py::arg("input"), py::arg("nthreads") = 1, fht_docstring); + m.def("fht_", npfht_, py::arg("input"), py::arg("nthreads") = 1, "In-place version of ffht.fht. See ffht.fht documentation for details"); } diff --git a/setup.py b/setup.py index 9b9058a..b7c9043 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ extra_compile_args=['-march=native', '-O3', '-Wall', '-Wextra', '-pedantic', '-Wshadow', '-Wpointer-arith', '-Wcast-qual', '-Wstrict-prototypes', '-Wmissing-prototypes', - '-std=c++11'], + '-std=c++11', '-fopenmp'], include_dirs=[np.get_include()] + ["pybind11/include"]) setup(name='FFHT',