From a631d5a14a5a31971d9142c073087b8d1996361d Mon Sep 17 00:00:00 2001
From: Mohammadreza <mzf0069@auburn.edu>
Date: Mon, 26 Jan 2026 16:28:45 -0600
Subject: [PATCH 01/13] Add header for Chebyshev anisotropic pair potential

---
 src/ChebyshevTensorAnisotropicPairPotential.h | 130 ++++++++++++++++++
 1 file changed, 130 insertions(+)
 create mode 100644 src/ChebyshevTensorAnisotropicPairPotential.h
diff --git a/src/ChebyshevTensorAnisotropicPairPotential.h b/src/ChebyshevTensorAnisotropicPairPotential.h
new file mode 100644
index 0000000..626771d
--- /dev/null
+++ b/src/ChebyshevTensorAnisotropicPairPotential.h
@@ -0,0 +1,130 @@
+// Copyright (c) 2018-2020, Michael P. Howard
+// Copyright (c) 2021-2025, Auburn University
+// Part of azplugins, released under the BSD 3-Clause License.
+
+/*!
+ * \file ChebyshevTensorAnisotropicPairPotential.h
+ * \brief Declaration of ChebyshevTensorAnisotropicPairPotential
+ */
+
+#ifndef AZPLUGINS_CHEBYSHEV_TENSOR_ANISO_PAIR_POTENTIAL_H_
+#define AZPLUGINS_CHEBYSHEV_TENSOR_ANISO_PAIR_POTENTIAL_H_
+
+#ifdef NVCC
+#error This header cannot be compiled by nvcc
+#endif
+
+#include <memory>
+#include <pybind11/pybind11.h>
+#include <vector>
+
+#include "hoomd/ForceCompute.h"
+#include "hoomd/GPUArray.h"
+#include "hoomd/HOOMDMath.h"
+#include "hoomd/Index1D.h"
+
+#include "hoomd/md/NeighborList.h"
+
+namespace hoomd
+    {
+namespace azplugins
+    {
+
+class R0Interpolator;
+
+class PYBIND11_EXPORT ChebyshevTensorAnisotropicPairPotential : public ForceCompute
+    {
+    public:
+    //! Constructor
+    ChebyshevTensorAnisotropicPairPotential(std::shared_ptr<SystemDefinition> sysdef,
+                                            std::shared_ptr<hoomd::md::NeighborList> nlist);
+
+    //! Destructor
+    virtual ~ChebyshevTensorAnisotropicPairPotential();
+
+    // Getters
+    std::shared_ptr<hoomd::md::NeighborList> getNeighborList() const
+        {
+        return m_nlist;
+        }
+
+    /// 6x2 domain: stored as 6 entries of Scalar2 = (min,max)
+    const GPUArray<Scalar2>& getApproximationDomain() const
+        {
+        return m_domain;
+        }
+
+    /// Term degrees (Nterms x 6)
+    const GPUArray<unsigned int>& getChebyshevTermList() const
+        {
+        return m_terms;
+        }
+
+    /// Coefficients (length = Nterms)
+    const GPUArray<Scalar>& getCoefficients() const
+        {
+        return m_coeffs;
+        }
+
+    /// Max degree per dimension length (length = 6)
+    const GPUArray<unsigned int>& getMaxDegreePerDim() const
+        {
+        return m_max_degree;
+        }
+
+    /// Number of terms (int)
+    unsigned int getNTerms() const
+        {
+        return m_Nterms;
+        }
+
+    /// R0 interpolator object
+    std::shared_ptr<R0Interpolator> getR0Interpolator() const
+        {
+        return m_r0_interp;
+        }
+
+    /// Allocate storage for term list and coefficients.
+    void resizeTerms(unsigned int Nterms);
+
+    void setR0Interpolator(std::shared_ptr<R0Interpolator> interp)
+        {
+        m_r0_interp = interp;
+        }
+
+    protected:
+    void computeForces(uint64_t timestep) override;
+
+    private:
+    // 1) neighbor list object
+    std::shared_ptr<hoomd::md::NeighborList> m_nlist;
+
+    // 2) approximation domain (6x2): 6 rows, each is (min,max)
+    GPUArray<Scalar2> m_domain;
+
+    // 3) r0 interpolation object
+    std::shared_ptr<R0Interpolator> m_r0_interp;
+
+    // 4) Chebyshev term list (Nterms x 6)
+    GPUArray<unsigned int> m_terms;
+
+    // 5) coeffs (Nterms)
+    GPUArray<Scalar> m_coeffs;
+
+    // 6) max degree per dimension (6)
+    GPUArray<unsigned int> m_max_degree;
+
+    // 7) number of terms
+    unsigned int m_Nterms;
+    };
+
+namespace detail
+    {
+///! exports to Python
+void export_ChebyshevTensorAnisotropicPairPotential(pybind11::module& m);
+    } // end namespace detail
+
+    } // end namespace azplugins
+    } // end namespace hoomd
+
+#endif // AZPLUGINS_CHEBYSHEV_TENSOR_ANISO_PAIR_POTENTIAL_H_

From c0323f1a22c011610a9c8e8d347e6bc96a929e36 Mon Sep 17 00:00:00 2001
From: Mohammadreza <mzf0069@auburn.edu>
Date: Thu, 5 Feb 2026 16:58:58 -0600
Subject: [PATCH 02/13] Update header interface

---
 src/ChebyshevTensorAnisotropicPairPotential.h | 55 ++++++++-----------
 1 file changed, 24 insertions(+), 31 deletions(-)

diff --git a/src/ChebyshevTensorAnisotropicPairPotential.h b/src/ChebyshevTensorAnisotropicPairPotential.h
index 626771d..7d2374f 100644
--- a/src/ChebyshevTensorAnisotropicPairPotential.h
+++ b/src/ChebyshevTensorAnisotropicPairPotential.h
@@ -7,13 +7,14 @@
  * \brief Declaration of ChebyshevTensorAnisotropicPairPotential
  */
 
-#ifndef AZPLUGINS_CHEBYSHEV_TENSOR_ANISO_PAIR_POTENTIAL_H_
-#define AZPLUGINS_CHEBYSHEV_TENSOR_ANISO_PAIR_POTENTIAL_H_
+#ifndef AZPLUGINS_CHEBYSHEV_TENSOR_ANISOTROPIC_PAIR_POTENTIAL_H_
+#define AZPLUGINS_CHEBYSHEV_TENSOR_ANISOTROPIC_PAIR_POTENTIAL_H_
 
 #ifdef NVCC
 #error This header cannot be compiled by nvcc
 #endif
 
+#include <array>
 #include <memory>
 #include <pybind11/pybind11.h>
 #include <vector>
@@ -30,14 +31,18 @@ namespace hoomd
 namespace azplugins
     {
 
-class R0Interpolator;
+class LinearInterpolator5D;
 
 class PYBIND11_EXPORT ChebyshevTensorAnisotropicPairPotential : public ForceCompute
     {
     public:
     //! Constructor
     ChebyshevTensorAnisotropicPairPotential(std::shared_ptr<SystemDefinition> sysdef,
-                                            std::shared_ptr<hoomd::md::NeighborList> nlist);
+                                            std::shared_ptr<hoomd::md::NeighborList> nlist,
+                                            const std::array<Scalar2, 6>& domain,
+                                            const std::vector<Scalar>& r0_data,
+                                            const std::vector<unsigned int>& terms,
+                                            const std::vector<Scalar>& coeffs);
 
     //! Destructor
     virtual ~ChebyshevTensorAnisotropicPairPotential();
@@ -54,6 +59,12 @@ class PYBIND11_EXPORT ChebyshevTensorAnisotropicPairPotential : public ForceComp
         return m_domain;
         }
 
+    /// r0 data (theta, phi, alpha, beta, gamma) (N x 6)
+    const GPUArray<Scalar>& getR0Data() const
+        {
+        return m_r0_data;
+        }
+
     /// Term degrees (Nterms x 6)
     const GPUArray<unsigned int>& getChebyshevTermList() const
         {
@@ -66,32 +77,14 @@ class PYBIND11_EXPORT ChebyshevTensorAnisotropicPairPotential : public ForceComp
         return m_coeffs;
         }
 
-    /// Max degree per dimension length (length = 6)
-    const GPUArray<unsigned int>& getMaxDegreePerDim() const
-        {
-        return m_max_degree;
-        }
-
-    /// Number of terms (int)
     unsigned int getNTerms() const
         {
         return m_Nterms;
         }
 
-    /// R0 interpolator object
-    std::shared_ptr<R0Interpolator> getR0Interpolator() const
-        {
-        return m_r0_interp;
-        }
-
     /// Allocate storage for term list and coefficients.
     void resizeTerms(unsigned int Nterms);
 
-    void setR0Interpolator(std::shared_ptr<R0Interpolator> interp)
-        {
-        m_r0_interp = interp;
-        }
-
     protected:
     void computeForces(uint64_t timestep) override;
 
@@ -102,20 +95,20 @@ class PYBIND11_EXPORT ChebyshevTensorAnisotropicPairPotential : public ForceComp
     // 2) approximation domain (6x2): 6 rows, each is (min,max)
     GPUArray<Scalar2> m_domain;
 
-    // 3) r0 interpolation object
-    std::shared_ptr<R0Interpolator> m_r0_interp;
+    // 3) intenal r0 linear interpolator
+    std::unique_ptr<LinearInterpolator5D> m_r0_interp;
+
+    // 4) r0_data
+    GPUArray<Scalar> m_r0_data;
 
-    // 4) Chebyshev term list (Nterms x 6)
+    // 5) Chebyshev term list (Nterms x 6)
     GPUArray<unsigned int> m_terms;
 
-    // 5) coeffs (Nterms)
+    // 6) coeffs (Nterms)
     GPUArray<Scalar> m_coeffs;
 
-    // 6) max degree per dimension (6)
-    GPUArray<unsigned int> m_max_degree;
-
     // 7) number of terms
-    unsigned int m_Nterms;
+    unsigned int m_Nterms = 0;
     };
 
 namespace detail
@@ -127,4 +120,4 @@ void export_ChebyshevTensorAnisotropicPairPotential(pybind11::module& m);
     } // end namespace azplugins
     } // end namespace hoomd
 
-#endif // AZPLUGINS_CHEBYSHEV_TENSOR_ANISO_PAIR_POTENTIAL_H_
+#endif // AZPLUGINS_CHEBYSHEV_TENSOR_ANISOTROPIC_PAIR_POTENTIAL_H_

From 189f25f2d8fdeff994720bc45605aba81a201fb7 Mon Sep 17 00:00:00 2001
From: Mohammadreza <mzf0069@auburn.edu>
Date: Mon, 16 Feb 2026 09:17:05 -0600
Subject: [PATCH 03/13] Update header interface

---
 src/ChebyshevAnisotropicPairPotential.h       | 100 ++++++++++++++
 src/ChebyshevTensorAnisotropicPairPotential.h | 123 ------------------
 2 files changed, 100 insertions(+), 123 deletions(-)
 create mode 100644 src/ChebyshevAnisotropicPairPotential.h
 delete mode 100644 src/ChebyshevTensorAnisotropicPairPotential.h

diff --git a/src/ChebyshevAnisotropicPairPotential.h b/src/ChebyshevAnisotropicPairPotential.h
new file mode 100644
index 0000000..dd14b25
--- /dev/null
+++ b/src/ChebyshevAnisotropicPairPotential.h
@@ -0,0 +1,100 @@
+// Copyright (c) 2018-2020, Michael P. Howard
+// Copyright (c) 2021-2025, Auburn University
+// Part of azplugins, released under the BSD 3-Clause License.
+
+/*!
+ * \file ChebyshevAnisotropicPairPotential.h
+ * \brief Declaration of ChebyshevAnisotropicPairPotential
+ */
+
+#ifndef AZPLUGINS_CHEBYSHEV_ANISOTROPIC_PAIR_POTENTIAL_H_
+#define AZPLUGINS_CHEBYSHEV_ANISOTROPIC_PAIR_POTENTIAL_H_
+
+#ifdef NVCC
+#error This header cannot be compiled by nvcc
+#endif
+
+#include <array>
+#include <memory>
+#include <pybind11/pybind11.h>
+#include <vector>
+
+#include "hoomd/ForceCompute.h"
+#include "hoomd/GPUArray.h"
+#include "hoomd/HOOMDMath.h"
+#include "hoomd/Index1D.h"
+
+#include "hoomd/md/NeighborList.h"
+
+namespace hoomd
+    {
+namespace azplugins
+    {
+
+class LinearInterpolator5D;
+
+class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
+    {
+    public:
+    //! Constructor
+    ChebyshevAnisotropicPairPotential(std::shared_ptr<SystemDefinition> sysdef,
+                                      std::shared_ptr<hoomd::md::NeighborList> nlist,
+                                      const Scalar* domain,
+                                      const Scalar* r0_data,
+                                      const unsigned int* r0_shape,
+                                      unsigned int Nterms,
+                                      const unsigned int* terms,
+                                      const Scalar* coeffs);
+
+    //! Destructor
+    virtual ~ChebyshevAnisotropicPairPotential();
+
+    // Getters
+    std::shared_ptr<hoomd::md::NeighborList> getNeighborList() const
+        {
+        return m_nlist;
+        }
+
+    /// 6x2 domain: stored as 6 entries of Scalar2 = (min,max)
+    const GPUArray<Scalar2>& getApproximationDomain() const
+        {
+        return m_domain;
+        }
+
+    protected:
+    void computeForces(uint64_t timestep) override;
+
+    // neighbor list object
+    std::shared_ptr<hoomd::md::NeighborList> m_nlist;
+
+    // approximation domain (6x2): 6 rows, each is (min,max)
+    GPUArray<Scalar2> m_domain;
+
+    // intenal r0 linear interpolator
+    std::unique_ptr<LinearInterpolator5D> m_r0_interp;
+
+    // r0_data
+    GPUArray<Scalar> m_r0_data;
+
+    std::array<unsigned int, 5> m_r0_shape;
+
+    // Chebyshev term list (Nterms x 6)
+    GPUArray<unsigned int> m_terms;
+
+    // coeffs (Nterms)
+    GPUArray<Scalar> m_coeffs;
+
+    // number of terms
+    unsigned int m_Nterms = 0;
+    };
+
+namespace detail
+    {
+///! exports to Python
+void export_ChebyshevAnisotropicPairPotential(pybind11::module& m);
+    } // end namespace detail
+
+    } // end namespace azplugins
+    } // end namespace hoomd
+
+#endif // AZPLUGINS_CHEBYSHEV_ANISOTROPIC_PAIR_POTENTIAL_H_
diff --git a/src/ChebyshevTensorAnisotropicPairPotential.h b/src/ChebyshevTensorAnisotropicPairPotential.h
deleted file mode 100644
index 7d2374f..0000000
--- a/src/ChebyshevTensorAnisotropicPairPotential.h
+++ /dev/null
@@ -1,123 +0,0 @@
-// Copyright (c) 2018-2020, Michael P. Howard
-// Copyright (c) 2021-2025, Auburn University
-// Part of azplugins, released under the BSD 3-Clause License.
-
-/*!
- * \file ChebyshevTensorAnisotropicPairPotential.h
- * \brief Declaration of ChebyshevTensorAnisotropicPairPotential
- */
-
-#ifndef AZPLUGINS_CHEBYSHEV_TENSOR_ANISOTROPIC_PAIR_POTENTIAL_H_
-#define AZPLUGINS_CHEBYSHEV_TENSOR_ANISOTROPIC_PAIR_POTENTIAL_H_
-
-#ifdef NVCC
-#error This header cannot be compiled by nvcc
-#endif
-
-#include <array>
-#include <memory>
-#include <pybind11/pybind11.h>
-#include <vector>
-
-#include "hoomd/ForceCompute.h"
-#include "hoomd/GPUArray.h"
-#include "hoomd/HOOMDMath.h"
-#include "hoomd/Index1D.h"
-
-#include "hoomd/md/NeighborList.h"
-
-namespace hoomd
-    {
-namespace azplugins
-    {
-
-class LinearInterpolator5D;
-
-class PYBIND11_EXPORT ChebyshevTensorAnisotropicPairPotential : public ForceCompute
-    {
-    public:
-    //! Constructor
-    ChebyshevTensorAnisotropicPairPotential(std::shared_ptr<SystemDefinition> sysdef,
-                                            std::shared_ptr<hoomd::md::NeighborList> nlist,
-                                            const std::array<Scalar2, 6>& domain,
-                                            const std::vector<Scalar>& r0_data,
-                                            const std::vector<unsigned int>& terms,
-                                            const std::vector<Scalar>& coeffs);
-
-    //! Destructor
-    virtual ~ChebyshevTensorAnisotropicPairPotential();
-
-    // Getters
-    std::shared_ptr<hoomd::md::NeighborList> getNeighborList() const
-        {
-        return m_nlist;
-        }
-
-    /// 6x2 domain: stored as 6 entries of Scalar2 = (min,max)
-    const GPUArray<Scalar2>& getApproximationDomain() const
-        {
-        return m_domain;
-        }
-
-    /// r0 data (theta, phi, alpha, beta, gamma) (N x 6)
-    const GPUArray<Scalar>& getR0Data() const
-        {
-        return m_r0_data;
-        }
-
-    /// Term degrees (Nterms x 6)
-    const GPUArray<unsigned int>& getChebyshevTermList() const
-        {
-        return m_terms;
-        }
-
-    /// Coefficients (length = Nterms)
-    const GPUArray<Scalar>& getCoefficients() const
-        {
-        return m_coeffs;
-        }
-
-    unsigned int getNTerms() const
-        {
-        return m_Nterms;
-        }
-
-    /// Allocate storage for term list and coefficients.
-    void resizeTerms(unsigned int Nterms);
-
-    protected:
-    void computeForces(uint64_t timestep) override;
-
-    private:
-    // 1) neighbor list object
-    std::shared_ptr<hoomd::md::NeighborList> m_nlist;
-
-    // 2) approximation domain (6x2): 6 rows, each is (min,max)
-    GPUArray<Scalar2> m_domain;
-
-    // 3) intenal r0 linear interpolator
-    std::unique_ptr<LinearInterpolator5D> m_r0_interp;
-
-    // 4) r0_data
-    GPUArray<Scalar> m_r0_data;
-
-    // 5) Chebyshev term list (Nterms x 6)
-    GPUArray<unsigned int> m_terms;
-
-    // 6) coeffs (Nterms)
-    GPUArray<Scalar> m_coeffs;
-
-    // 7) number of terms
-    unsigned int m_Nterms = 0;
-    };
-
-namespace detail
-    {
-///! exports to Python
-void export_ChebyshevTensorAnisotropicPairPotential(pybind11::module& m);
-    } // end namespace detail
-
-    } // end namespace azplugins
-    } // end namespace hoomd
-
-#endif // AZPLUGINS_CHEBYSHEV_TENSOR_ANISOTROPIC_PAIR_POTENTIAL_H_

From 67e1bdc95acaad92363221be426a1d69c9b004f1 Mon Sep 17 00:00:00 2001
From: Mohammadreza <mzf0069@auburn.edu>
Date: Fri, 27 Feb 2026 10:02:57 -0600
Subject: [PATCH 04/13] Update Chebyshev anisotropic pair potential and add
 linear interpolant

---
 src/CMakeLists.txt                       |   1 +
 src/ChebyshevAnisotropicPairPotential.cc | 129 ++++++++++++++++
 src/ChebyshevAnisotropicPairPotential.h  |  48 +++---
 src/LinearInterpolator5D.h               | 186 +++++++++++++++++++++++
 src/module.cc                            |   2 +
 src/pair.py                              |  56 +++++++
 src/pytest/CMakeLists.txt                |   1 +
 src/pytest/test_chebyshev.py             |  71 +++++++++
 8 files changed, 473 insertions(+), 21 deletions(-)
 create mode 100644 src/ChebyshevAnisotropicPairPotential.cc
 create mode 100644 src/LinearInterpolator5D.h
 create mode 100644 src/pytest/test_chebyshev.py

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a5529d9..304b83f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,6 +3,7 @@ set(COMPONENT_NAME azplugins)
 
 # TODO: List all host C++ source code files in _${COMPONENT_NAME}_sources.
 set(_${COMPONENT_NAME}_sources
+    ChebyshevAnisotropicPairPotential.cc
     ConstantFlow.cc
     export_ImagePotentialBondHarmonic.cc
     module.cc
diff --git a/src/ChebyshevAnisotropicPairPotential.cc b/src/ChebyshevAnisotropicPairPotential.cc
new file mode 100644
index 0000000..b77ae2f
--- /dev/null
+++ b/src/ChebyshevAnisotropicPairPotential.cc
@@ -0,0 +1,129 @@
+// Copyright (c) 2018-2020, Michael P. Howard
+// Copyright (c) 2021-2025, Auburn University
+// Part of azplugins, released under the BSD 3-Clause License.
+
+/*!
+ * \file ChebyshevAnisotropicPairPotential.h
+ * \brief Definition of ChebyshevAnisotropicPairPotential
+ */
+
+#include "ChebyshevAnisotropicPairPotential.h"
+#include "LinearInterpolator5D.h"
+
+namespace hoomd
+    {
+namespace azplugins
+    {
+
+ChebyshevAnisotropicPairPotential::ChebyshevAnisotropicPairPotential(
+    std::shared_ptr<SystemDefinition> sysdef,
+    std::shared_ptr<hoomd::md::NeighborList> nlist,
+    const Scalar* domain,
+    const float r_cut,
+    const unsigned int* terms,
+    const Scalar* coeffs,
+    unsigned int Nterms,
+    const Scalar* r0_data,
+    const unsigned int* r0_shape)
+    : ForceCompute(sysdef), m_nlist(nlist), m_r_cut(r_cut), m_Nterms(Nterms)
+    {
+    }
+
+ChebyshevAnisotropicPairPotential::~ChebyshevAnisotropicPairPotential() { }
+
+void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
+    {
+    if (m_nlist)
+        {
+        m_nlist->compute(timestep);
+        }
+
+    const unsigned int N = m_pdata->getN();
+
+    ArrayHandle<Scalar4> h_force(m_force, access_location::host, access_mode::readwrite);
+    ArrayHandle<Scalar4> h_torque(m_torque, access_location::host, access_mode::readwrite);
+
+    for (unsigned int i = 0; i < N; ++i)
+        {
+        h_force.data[i] = make_scalar4(Scalar(0), Scalar(0), Scalar(0), Scalar(0));
+        h_torque.data[i] = make_scalar4(Scalar(0), Scalar(0), Scalar(0), Scalar(0));
+        }
+    }
+
+namespace detail
+    {
+
+void export_ChebyshevAnisotropicPairPotential(pybind11::module& m)
+    {
+    namespace py = pybind11;
+    using NL = hoomd::md::NeighborList;
+
+    py::class_<ChebyshevAnisotropicPairPotential,
+               std::shared_ptr<ChebyshevAnisotropicPairPotential>>(
+        m,
+        "ChebyshevAnisotropicPairPotential",
+        py::base<hoomd::ForceCompute>())
+        .def(py::init(
+            [](std::shared_ptr<SystemDefinition> sysdef,
+               std::shared_ptr<NL> nlist,
+               py::array_t<Scalar, py::array::c_style | py::array::forcecast> domain,
+               float r_cut,
+               py::array_t<unsigned int, py::array::c_style | py::array::forcecast> terms,
+               py::array_t<Scalar, py::array::c_style | py::array::forcecast> coeffs,
+               py::array_t<Scalar, py::array::c_style | py::array::forcecast> r0_data)
+            {
+                // domain must be (5,2) - rho is always in (0, 1)
+                if (domain.ndim() != 2 || domain.shape(0) != 5 || domain.shape(1) != 2)
+                    {
+                    throw std::runtime_error("domain must have shape (5,2).");
+                    }
+
+                // terms must be (Nterms,6)
+                if (terms.ndim() != 2 || terms.shape(1) != 6)
+                    {
+                    throw std::runtime_error("terms must have shape (Nterms,6).");
+                    }
+
+                const unsigned int Nterms = static_cast<unsigned int>(terms.shape(0));
+
+                // coeffs must be (Nterms,)
+                if (coeffs.ndim() != 1 || static_cast<unsigned int>(coeffs.shape(0)) != Nterms)
+                    {
+                    throw std::runtime_error("coeffs must have shape (Nterms,).");
+                    }
+
+                // r0_data must be 5D
+                if (r0_data.ndim() != 5)
+                    {
+                    throw std::runtime_error("r0_data must be a 5D array.");
+                    }
+
+                // Infer r0_shape from r0_data.shape
+                std::array<unsigned int, 5> r0_shape;
+                for (unsigned int k = 0; k < 5; ++k)
+                    {
+                    const auto dim = r0_data.shape(k);
+                    if (dim < 2)
+                        {
+                        throw std::runtime_error("r0_data has invalid dimension(s).");
+                        }
+                    r0_shape[k] = static_cast<unsigned int>(dim);
+                    }
+
+                return std::make_shared<ChebyshevAnisotropicPairPotential>(sysdef,
+                                                                           nlist,
+                                                                           domain.data(),
+                                                                           r_cut,
+                                                                           terms.data(),
+                                                                           coeffs.data(),
+                                                                           Nterms,
+                                                                           r0_data.data(),
+                                                                           r0_shape.data());
+            }))
+        .def_property_readonly("r_cut", &ChebyshevAnisotropicPairPotential::getRCut)
+        .def_property_readonly("n_terms", &ChebyshevAnisotropicPairPotential::getNTerms);
+    }
+
+    } // end namespace detail
+    } // namespace azplugins
+    } // namespace hoomd
diff --git a/src/ChebyshevAnisotropicPairPotential.h b/src/ChebyshevAnisotropicPairPotential.h
index dd14b25..d054907 100644
--- a/src/ChebyshevAnisotropicPairPotential.h
+++ b/src/ChebyshevAnisotropicPairPotential.h
@@ -31,7 +31,7 @@ namespace hoomd
 namespace azplugins
     {
 
-class LinearInterpolator5D;
+template<typename T> class LinearInterpolator5D;
 
 class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
     {
@@ -40,11 +40,12 @@ class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
     ChebyshevAnisotropicPairPotential(std::shared_ptr<SystemDefinition> sysdef,
                                       std::shared_ptr<hoomd::md::NeighborList> nlist,
                                       const Scalar* domain,
-                                      const Scalar* r0_data,
-                                      const unsigned int* r0_shape,
-                                      unsigned int Nterms,
+                                      const float r_cut,
                                       const unsigned int* terms,
-                                      const Scalar* coeffs);
+                                      const Scalar* coeffs,
+                                      unsigned int Nterms,
+                                      const Scalar* r0_data,
+                                      const unsigned int* r0_shape);
 
     //! Destructor
     virtual ~ChebyshevAnisotropicPairPotential();
@@ -55,37 +56,42 @@ class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
         return m_nlist;
         }
 
-    /// 6x2 domain: stored as 6 entries of Scalar2 = (min,max)
+    /// 5x2 domain: stored as 5 entries of Scalar2 = (min,max)
     const GPUArray<Scalar2>& getApproximationDomain() const
         {
         return m_domain;
         }
 
+    /// Read-only cutoff radius
+    const float getRCut() const
+        {
+        return m_r_cut;
+        }
+
+    /// Read-only number of Chebyshev terms
+    unsigned int getNTerms() const
+        {
+        return m_Nterms;
+        }
+
     protected:
     void computeForces(uint64_t timestep) override;
 
-    // neighbor list object
-    std::shared_ptr<hoomd::md::NeighborList> m_nlist;
+    std::shared_ptr<hoomd::md::NeighborList> m_nlist; //!< Neighbor list
 
-    // approximation domain (6x2): 6 rows, each is (min,max)
-    GPUArray<Scalar2> m_domain;
+    GPUArray<Scalar2> m_domain; //!< Approximation domain (5x2): 5 rows, each is (min, max)
 
-    // intenal r0 linear interpolator
-    std::unique_ptr<LinearInterpolator5D> m_r0_interp;
+    float m_r_cut; //!< cut-off distance in approximation domain
 
-    // r0_data
-    GPUArray<Scalar> m_r0_data;
+    GPUArray<unsigned int> m_terms; //!< Chebyshev term list (Nterms x 6)
 
-    std::array<unsigned int, 5> m_r0_shape;
+    GPUArray<Scalar> m_coeffs; //!< Coefficients corresponding to each term
 
-    // Chebyshev term list (Nterms x 6)
-    GPUArray<unsigned int> m_terms;
+    unsigned int m_Nterms; //!< Number of terms
 
-    // coeffs (Nterms)
-    GPUArray<Scalar> m_coeffs;
+    GPUArray<Scalar> m_r0_data; //!< R0 data
 
-    // number of terms
-    unsigned int m_Nterms = 0;
+    GPUArray<unsigned int> m_r0_shape; //!< Number of points used along each dimension to sample r0
     };
 
 namespace detail
diff --git a/src/LinearInterpolator5D.h b/src/LinearInterpolator5D.h
new file mode 100644
index 0000000..9e1f292
--- /dev/null
+++ b/src/LinearInterpolator5D.h
@@ -0,0 +1,186 @@
+// Copyright (c) 2018-2020, Michael P. Howard
+// Copyright (c) 2021-2025, Auburn University
+// Part of azplugins, released under the BSD 3-Clause License.
+
+#ifndef AZPLUGINS_LINEAR_INTERPOLATOR_5D_H_
+#define AZPLUGINS_LINEAR_INTERPOLATOR_5D_H_
+
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+
+#include "hoomd/HOOMDMath.h"
+
+#if defined(__HIPCC__) || defined(__CUDACC__)
+#define AZPLUGINS_HOSTDEVICE __host__ __device__
+#define AZPLUGINS_FORCEINLINE __forceinline__
+#else
+#define AZPLUGINS_HOSTDEVICE
+#define AZPLUGINS_FORCEINLINE inline
+#endif
+
+namespace hoomd
+    {
+namespace azplugins
+    {
+
+class FiveDimensionalIndex
+    {
+    public:
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE FiveDimensionalIndex()
+        : n0_(0), n1_(0), n2_(0), n3_(0), n4_(0)
+        {
+        }
+
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE FiveDimensionalIndex(unsigned int n0,
+                                                                    unsigned int n1,
+                                                                    unsigned int n2,
+                                                                    unsigned int n3,
+                                                                    unsigned int n4)
+        : n0_(n0), n1_(n1), n2_(n2), n3_(n3), n4_(n4)
+        {
+        }
+
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE unsigned int operator()(unsigned int i0,
+                                                                       unsigned int i1,
+                                                                       unsigned int i2,
+                                                                       unsigned int i3,
+                                                                       unsigned int i4) const
+        {
+        unsigned int idx = i0;
+        idx = idx * n1_ + i1;
+        idx = idx * n2_ + i2;
+        idx = idx * n3_ + i3;
+        idx = idx * n4_ + i4;
+        return idx;
+        }
+
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE unsigned int size() const
+        {
+        return n0_ * n1_ * n2_ * n3_ * n4_;
+        }
+
+    private:
+    unsigned int n0_, n1_, n2_, n3_, n4_;
+    };
+
+// T is the stored data type.
+template<typename T> class LinearInterpolator5D
+    {
+    public:
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE LinearInterpolator5D() : data_(nullptr), nindex_()
+        {
+        for (int d = 0; d < 5; ++d)
+            {
+            n_[d] = 0;
+            lo_[d] = Scalar(0);
+            hi_[d] = Scalar(0);
+            dx_[d] = Scalar(0);
+            }
+        }
+
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE
+    LinearInterpolator5D(const T* data, const unsigned int* n, const Scalar* lo, const Scalar* hi)
+        : data_(data)
+        {
+        for (int d = 0; d < 5; ++d)
+            {
+            n_[d] = n[d];
+            lo_[d] = lo[d];
+            hi_[d] = hi[d];
+
+            assert(n_[d] >= 2);
+            dx_[d] = (hi_[d] - lo_[d]) / Scalar(n_[d] - 1);
+            }
+
+        nindex_ = FiveDimensionalIndex(n_[0], n_[1], n_[2], n_[3], n_[4]);
+        }
+
+    // Interpolate at (x0, x1, x2, x3, x4).
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE Scalar
+    operator()(Scalar x0, Scalar x1, Scalar x2, Scalar x3, Scalar x4) const
+        {
+        const Scalar x[5] = {x0, x1, x2, x3, x4};
+
+        Scalar f[5];
+        for (int d = 0; d < 5; ++d)
+            {
+            f[d] = (x[d] - lo_[d]) / dx_[d];
+            }
+
+        int bin[5];
+        Scalar dloc[5];
+
+        for (int dim = 0; dim < 5; ++dim)
+            {
+            bin[dim] = (int)std::floor((double)f[dim]);
+
+            if (f[dim] == Scalar(n_[dim] - 1) && x[dim] == hi_[dim])
+                {
+                --bin[dim];
+                }
+
+            dloc[dim] = f[dim] - Scalar(bin[dim]);
+
+            assert(bin[dim] >= 0);
+            assert(bin[dim] < (int)n_[dim] - 1);
+            }
+
+        const Scalar xd = dloc[0];
+        const Scalar yd = dloc[1];
+        const Scalar zd = dloc[2];
+        const Scalar wd = dloc[3];
+        const Scalar vd = dloc[4];
+
+        Scalar c[32];
+        for (unsigned int mask = 0; mask < 32; ++mask)
+            {
+            const unsigned int i0 = (unsigned int)(bin[0] + ((mask >> 0) & 1u));
+            const unsigned int i1 = (unsigned int)(bin[1] + ((mask >> 1) & 1u));
+            const unsigned int i2 = (unsigned int)(bin[2] + ((mask >> 2) & 1u));
+            const unsigned int i3 = (unsigned int)(bin[3] + ((mask >> 3) & 1u));
+            const unsigned int i4 = (unsigned int)(bin[4] + ((mask >> 4) & 1u));
+
+            c[mask] = Scalar(data_[nindex_(i0, i1, i2, i3, i4)]);
+            }
+
+        Scalar c0[16];
+        for (unsigned int i = 0; i < 16; ++i)
+            {
+            c0[i] = c[2 * i] * (Scalar(1) - xd) + c[2 * i + 1] * xd;
+            }
+
+        Scalar c1[8];
+        for (unsigned int i = 0; i < 8; ++i)
+            {
+            c1[i] = c0[2 * i] * (Scalar(1) - yd) + c0[2 * i + 1] * yd;
+            }
+
+        Scalar c2[4];
+        for (unsigned int i = 0; i < 4; ++i)
+            {
+            c2[i] = c1[2 * i] * (Scalar(1) - zd) + c1[2 * i + 1] * zd;
+            }
+
+        Scalar c3[2];
+        for (unsigned int i = 0; i < 2; ++i)
+            {
+            c3[i] = c2[2 * i] * (Scalar(1) - wd) + c2[2 * i + 1] * wd;
+            }
+
+        return c3[0] * (Scalar(1) - vd) + c3[1] * vd;
+        }
+
+    private:
+    const T* data_;
+    unsigned int n_[5];
+    Scalar lo_[5];
+    Scalar hi_[5];
+    Scalar dx_[5];
+    FiveDimensionalIndex nindex_;
+    };
+
+    } // namespace azplugins
+    } // namespace hoomd
+
+#endif // AZPLUGINS_LINEAR_INTERPOLATOR_5D_H_
diff --git a/src/module.cc b/src/module.cc
index 1ab81c7..d22f6dd 100644
--- a/src/module.cc
+++ b/src/module.cc
@@ -69,6 +69,7 @@ void export_ParabolicFlow(pybind11::module&);
 
 // pair
 void export_AnisoPotentialPairTwoPatchMorse(pybind11::module&);
+void export_ChebyshevAnisotropicPairPotential(pybind11::module&);
 void export_PotentialPairColloid(pybind11::module&);
 void export_PotentialPairExpandedYukawa(pybind11::module&);
 void export_PotentialPairHertz(pybind11::module&);
@@ -141,6 +142,7 @@ PYBIND11_MODULE(_azplugins, m)
 
     // pair
     export_AnisoPotentialPairTwoPatchMorse(m);
+    export_ChebyshevAnisotropicPairPotential(m);
     export_PotentialPairColloid(m);
     export_PotentialPairExpandedYukawa(m);
     export_PotentialPairHertz(m);
diff --git a/src/pair.py b/src/pair.py
index fbefb07..8e982ae 100644
--- a/src/pair.py
+++ b/src/pair.py
@@ -4,13 +4,69 @@
 
 """Pair potentials."""
 
+import numpy
 from hoomd.azplugins import _azplugins
 from hoomd.data.parameterdicts import ParameterDict, TypeParameterDict
 from hoomd.data.typeparam import TypeParameter
 from hoomd.md import pair
+from hoomd.md.force import Force
 from hoomd.variant import Variant
 
 
+class ChebyshevAnisotropicPairPotential(Force):
+    """Chebyshev anisotropic pair potential."""
+
+    def __init__(self, nlist, domain, terms, coeffs, r0_data, r_cut=3.0):
+        super().__init__()
+        self._nlist = nlist
+
+        self._domain = numpy.asarray(domain)
+        self._r_cut = float(r_cut)
+        self._terms = numpy.asarray(terms, dtype=numpy.uint32)
+        self._coeffs = numpy.asarray(coeffs)
+        self._r0_data = numpy.asarray(r0_data)
+
+        if self._domain.shape != (5, 2):
+            raise ValueError("domain must have shape (5, 2).")
+        if self._terms.ndim != 2 or self._terms.shape[1] != 6:
+            raise ValueError("terms must have shape (Nterms, 6).")
+        nterms = int(self._terms.shape[0])
+        if self._coeffs.ndim != 1 or int(self._coeffs.shape[0]) != nterms:
+            raise ValueError("coeffs must have shape (Nterms,).")
+        if self._r0_data.ndim != 5:
+            raise ValueError("r0_data must be a 5D array.")
+
+    @property
+    def r_cut(self):
+        """Cut-off distance in approximation domain"""
+        return self._r_cut
+
+    @property
+    def n_terms(self):
+        """Number of terms."""
+        return int(self._terms.shape[0])
+
+    @property
+    def r0_shape(self):
+        """r0 table shape."""
+        return tuple(int(x) for x in self._r0_data.shape)
+
+    def _attach_hook(self):
+        self._nlist._attach(self._simulation)
+
+        self._cpp_obj = _azplugins.ChebyshevAnisotropicPairPotential(
+            self._simulation.state._cpp_sys_def,
+            self._nlist._cpp_obj,
+            self._domain,
+            self._r_cut,
+            self._terms,
+            self._coeffs,
+            self._r0_data,
+        )
+
+        super()._attach_hook()
+
+
 class Colloid(pair.Pair):
     r"""Colloid pair potential.
 
diff --git a/src/pytest/CMakeLists.txt b/src/pytest/CMakeLists.txt
index 157b326..4676d3a 100644
--- a/src/pytest/CMakeLists.txt
+++ b/src/pytest/CMakeLists.txt
@@ -3,6 +3,7 @@ set(test_files
     __init__.py
     test_bond.py
     test_compute.py
+    test_chebyshev.py
     test_external.py
     test_flow.py
     test_pair.py
diff --git a/src/pytest/test_chebyshev.py b/src/pytest/test_chebyshev.py
new file mode 100644
index 0000000..c0b16cc
--- /dev/null
+++ b/src/pytest/test_chebyshev.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2018-2020, Michael P. Howard
+# Copyright (c) 2021-2025, Auburn University
+# Part of azplugins, released under the BSD 3-Clause License.
+
+import numpy
+import hoomd
+import hoomd.azplugins
+
+
+def test_attach_and_zero_force(simulation_factory, two_particle_snapshot_factory):
+    """Construct, attach, and check force/torque output."""
+
+    # Construct the Python object
+    nlist = hoomd.md.nlist.Cell(buffer=0.4)
+
+    domain = numpy.zeros((5, 2), dtype=numpy.float64)
+    terms = numpy.zeros((2, 6), dtype=numpy.uint32)
+    coeffs = numpy.zeros((2,), dtype=numpy.float64)
+    r0_data = numpy.zeros((2, 2, 2, 2, 2), dtype=numpy.float64)
+    r_cut = 3.0
+
+    pot = hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(
+        nlist=nlist,
+        domain=domain,
+        r_cut=r_cut,
+        terms=terms,
+        coeffs=coeffs,
+        r0_data=r0_data,
+    )
+
+    # Pre-attach checks
+    assert numpy.isclose(pot.r_cut, r_cut)
+    assert pot.n_terms == 2
+    assert pot.r0_shape == (2, 2, 2, 2, 2)
+
+    # Attach via a 0-step simulation
+    snap = two_particle_snapshot_factory()
+    if snap.communicator.rank == 0:
+        snap.particles.position[:] = [[-0.5, 0.0, 0.0], [0.5, 0.0, 0.0]]
+        snap.particles.orientation[:] = [[1, 0, 0, 0], [1, 0, 0, 0]]
+        snap.particles.moment_inertia[:] = [0.1, 0.1, 0.1]
+
+    sim = simulation_factory(snap)
+
+    integrator = hoomd.md.Integrator(dt=0.001)
+    nve = hoomd.md.methods.ConstantVolume(hoomd.filter.All())
+    integrator.methods = [nve]
+
+    integrator.forces = [pot]
+    sim.operations.integrator = integrator
+
+    # Attach all objects
+    sim.run(0)
+
+    # After attach
+    assert hasattr(pot, "_cpp_obj")
+    assert pot._cpp_obj is not None
+
+    # Post-attach checks
+    assert numpy.isclose(pot.r_cut, r_cut)
+    assert pot.n_terms == 2
+    assert pot.r0_shape == (2, 2, 2, 2, 2)
+
+    assert pot._cpp_obj.n_terms == 2
+    assert numpy.isclose(pot._cpp_obj.r_cut, r_cut)
+
+    # Check Force/torque/energy outputs
+    if sim.device.communicator.rank == 0:
+        numpy.testing.assert_array_equal(pot.forces, numpy.zeros((2, 3)))
+        numpy.testing.assert_array_equal(pot.torques, numpy.zeros((2, 3)))
+        numpy.testing.assert_array_equal(pot.energies, numpy.zeros((2,)))

From 45a1740044c627952e645be430bc6b020c7bcdb2 Mon Sep 17 00:00:00 2001
From: Mohammadreza <mzf0069@auburn.edu>
Date: Thu, 19 Mar 2026 11:27:43 -0500
Subject: [PATCH 05/13] Update linear interpolation, and initiate compute force

---
 src/ChebyshevAnisotropicPairPotential.cc | 322 ++++++++++++++++++++++-
 src/ChebyshevAnisotropicPairPotential.h  |  14 +-
 src/LinearInterpolator5D.h               | 168 ++++++------
 src/pair.py                              |  46 ++--
 src/pytest/test_chebyshev.py             |  78 +++---
 5 files changed, 472 insertions(+), 156 deletions(-)

diff --git a/src/ChebyshevAnisotropicPairPotential.cc b/src/ChebyshevAnisotropicPairPotential.cc
index b77ae2f..bc0b04c 100644
--- a/src/ChebyshevAnisotropicPairPotential.cc
+++ b/src/ChebyshevAnisotropicPairPotential.cc
@@ -10,6 +10,14 @@
 #include "ChebyshevAnisotropicPairPotential.h"
 #include "LinearInterpolator5D.h"
 
+#include "hoomd/BoxDim.h"
+#include "hoomd/VectorMath.h"
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <stdexcept>
+
 namespace hoomd
     {
 namespace azplugins
@@ -19,7 +27,7 @@ ChebyshevAnisotropicPairPotential::ChebyshevAnisotropicPairPotential(
     std::shared_ptr<SystemDefinition> sysdef,
     std::shared_ptr<hoomd::md::NeighborList> nlist,
     const Scalar* domain,
-    const float r_cut,
+    const Scalar r_cut,
     const unsigned int* terms,
     const Scalar* coeffs,
     unsigned int Nterms,
@@ -27,26 +35,324 @@ ChebyshevAnisotropicPairPotential::ChebyshevAnisotropicPairPotential(
     const unsigned int* r0_shape)
     : ForceCompute(sysdef), m_nlist(nlist), m_r_cut(r_cut), m_Nterms(Nterms)
     {
+        {
+        GPUArray<Scalar2> domain_arr(5, m_exec_conf);
+        m_domain.swap(domain_arr);
+
+        ArrayHandle<Scalar2> h_domain(m_domain, access_location::host, access_mode::readwrite);
+        for (unsigned int d = 0; d < 5; ++d)
+            {
+            h_domain.data[d] = make_scalar2(domain[2 * d], domain[2 * d + 1]);
+            }
+        }
+
+        // terms: shape (Nterms, 6), stored flat
+        {
+        GPUArray<unsigned int> terms_arr(static_cast<size_t>(Nterms) * 6, m_exec_conf);
+        m_terms.swap(terms_arr);
+
+        ArrayHandle<unsigned int> h_terms(m_terms, access_location::host, access_mode::readwrite);
+        std::copy(terms, terms + static_cast<size_t>(Nterms) * 6, h_terms.data);
+        }
+
+        // coeffs: shape (Nterms,)
+        {
+        GPUArray<Scalar> coeffs_arr(Nterms, m_exec_conf);
+        m_coeffs.swap(coeffs_arr);
+
+        ArrayHandle<Scalar> h_coeffs(m_coeffs, access_location::host, access_mode::readwrite);
+        std::copy(coeffs, coeffs + Nterms, h_coeffs.data);
+        }
+
+        // r0_shape: length 5
+        {
+        GPUArray<unsigned int> shape_arr(5, m_exec_conf);
+        m_r0_shape.swap(shape_arr);
+
+        ArrayHandle<unsigned int> h_shape(m_r0_shape,
+                                          access_location::host,
+                                          access_mode::readwrite);
+        std::copy(r0_shape, r0_shape + 5, h_shape.data);
+        }
+
+    // r0_data: flat array, length = product(r0_shape)
+    size_t n_r0 = 1;
+    for (unsigned int d = 0; d < 5; ++d)
+        {
+        n_r0 *= static_cast<size_t>(r0_shape[d]);
+        }
+
+        {
+        GPUArray<Scalar> r0_arr(n_r0, m_exec_conf);
+        m_r0_data.swap(r0_arr);
+
+        ArrayHandle<Scalar> h_r0(m_r0_data, access_location::host, access_mode::readwrite);
+        std::copy(r0_data, r0_data + n_r0, h_r0.data);
+        }
     }
 
 ChebyshevAnisotropicPairPotential::~ChebyshevAnisotropicPairPotential() { }
 
 void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
     {
-    if (m_nlist)
+    // start by updating the neighborlist
+    m_nlist->compute(timestep);
+
+    // access neighbor list, particle data, and simulation box.
+    ArrayHandle<unsigned int> h_n_neigh(m_nlist->getNNeighArray(),
+                                        access_location::host,
+                                        access_mode::read);
+    ArrayHandle<unsigned int> h_nlist(m_nlist->getNListArray(),
+                                      access_location::host,
+                                      access_mode::read);
+    ArrayHandle<size_t> h_head_list(m_nlist->getHeadList(),
+                                    access_location::host,
+                                    access_mode::read);
+    ArrayHandle<Scalar4> h_pos(m_pdata->getPositions(), access_location::host, access_mode::read);
+    ArrayHandle<Scalar4> h_orientation(m_pdata->getOrientationArray(),
+                                       access_location::host,
+                                       access_mode::read);
+    ArrayHandle<Scalar2> h_domain(m_domain, access_location::host, access_mode::read);
+    ArrayHandle<Scalar> h_r0_data(m_r0_data, access_location::host, access_mode::read);
+    ArrayHandle<unsigned int> h_r0_shape(m_r0_shape, access_location::host, access_mode::read);
+
+    const BoxDim box = m_pdata->getGlobalBox();
+    const Scalar rcutsq = m_r_cut * m_r_cut;
+    const Scalar h = Scalar(1.0e-6);
+
+    Scalar lo[5];
+    Scalar hi[5];
+    for (unsigned int d = 0; d < 5; ++d)
         {
-        m_nlist->compute(timestep);
+        lo[d] = h_domain.data[d].x;
+        hi[d] = h_domain.data[d].y;
         }
 
-    const unsigned int N = m_pdata->getN();
+    LinearInterpolator5D<Scalar> interp(h_r0_data.data, h_r0_shape.data, lo, hi);
+
+    // need to start from a zero force and torque
+    m_force.zeroFill();
+    m_torque.zeroFill();
 
     ArrayHandle<Scalar4> h_force(m_force, access_location::host, access_mode::readwrite);
     ArrayHandle<Scalar4> h_torque(m_torque, access_location::host, access_mode::readwrite);
 
+    const unsigned int N = m_pdata->getN();
+
     for (unsigned int i = 0; i < N; ++i)
         {
-        h_force.data[i] = make_scalar4(Scalar(0), Scalar(0), Scalar(0), Scalar(0));
-        h_torque.data[i] = make_scalar4(Scalar(0), Scalar(0), Scalar(0), Scalar(0));
+        // particle i position and orientation
+        const Scalar3 pi = make_scalar3(h_pos.data[i].x, h_pos.data[i].y, h_pos.data[i].z);
+        const quat<Scalar> q_i(h_orientation.data[i]);
+        const quat<Scalar> q_i_conj = conj(q_i);
+
+        // initialize current particle force and torque
+        Scalar3 fi = make_scalar3(0, 0, 0);
+        Scalar3 ti = make_scalar3(0, 0, 0);
+
+        const size_t myHead = h_head_list.data[i];
+        const unsigned int size = (unsigned int)h_n_neigh.data[i];
+
+        for (unsigned int k = 0; k < size; ++k)
+            {
+            // access the index
+            const unsigned int j = h_nlist.data[myHead + k];
+            assert(j < m_pdata->getN() + m_pdata->getNGhosts());
+
+            const Scalar3 pj = make_scalar3(h_pos.data[j].x, h_pos.data[j].y, h_pos.data[j].z);
+            Scalar3 dx = pi - pj;
+            // apply periodic boundary conditions
+            dx = box.minImage(dx);
+
+            // cut-off check
+            const Scalar rsq = dot(dx, dx);
+            if (rsq > rcutsq)
+                {
+                continue;
+                }
+
+            // particle j, orientation quaternion
+            const quat<Scalar> q_j(h_orientation.data[j]);
+            // dx is in lab frame, so rotate dx by conj(q_i)
+            const vec3<Scalar> dx_lab(dx.x, dx.y, dx.z);
+            const vec3<Scalar> dx_body = rotate(q_i_conj, dx_lab);
+            // relative orientation of j with respect to i
+            const quat<Scalar> q_rel = q_i_conj * q_j;
+
+            // convert position to spherical coordinates
+            const Scalar r = fast::sqrt(dot(dx_body, dx_body));
+            Scalar theta = Scalar(0);
+            Scalar phi = Scalar(0);
+
+            if (r > Scalar(0))
+                {
+                theta = std::atan2(dx_body.y, dx_body.x);
+                if (theta < Scalar(0))
+                    {
+                    theta += Scalar(2.0) * M_PI;
+                    }
+
+                Scalar cosphi = dx_body.z / r;
+                if (cosphi < Scalar(-1))
+                    {
+                    cosphi = Scalar(-1);
+                    }
+                else if (cosphi > Scalar(1))
+                    {
+                    cosphi = Scalar(1);
+                    }
+
+                phi = std::acos(cosphi);
+                }
+
+            // get the columns of an active rotation matrix
+            const vec3<Scalar> ex = rotate(q_rel, vec3<Scalar>(1, 0, 0));
+            const vec3<Scalar> ey = rotate(q_rel, vec3<Scalar>(0, 1, 0));
+            const vec3<Scalar> ez = rotate(q_rel, vec3<Scalar>(0, 0, 1));
+
+            Scalar alpha = Scalar(0);
+            Scalar beta = Scalar(0);
+            Scalar gamma = Scalar(0);
+
+            // get the rotation angles by R_ZXZ (body-fixed) = R_q
+            if (ez.z < Scalar(-1))
+                {
+                beta = Scalar(M_PI);
+                }
+            else if (ez.z > Scalar(1))
+                {
+                beta = Scalar(0);
+                }
+            else
+                {
+                beta = std::acos(ez.z);
+                }
+
+            if (beta > Scalar(1e-7) && beta < Scalar(M_PI - 1e-7))
+                {
+                alpha = std::atan2(ez.x, -ez.y);
+                gamma = std::atan2(ex.z, ey.z);
+                }
+            else if (beta <= Scalar(1e-7))
+                {
+                alpha = Scalar(0);
+                gamma = std::atan2(ex.y, ex.x);
+                }
+            else
+                {
+                alpha = Scalar(0);
+                gamma = std::atan2(-ex.y, ex.x);
+                }
+
+            if (alpha < Scalar(0))
+                {
+                alpha += Scalar(2) * M_PI;
+                }
+            if (gamma < Scalar(0))
+                {
+                gamma += Scalar(2) * M_PI;
+                }
+
+            // compute r0 and its derivatives
+            const Scalar r0 = interp(theta, phi, alpha, beta, gamma);
+            Scalar dr0_dtheta = Scalar(0);
+            Scalar dr0_dphi = Scalar(0);
+            Scalar dr0_dalpha = Scalar(0);
+            Scalar dr0_dbeta = Scalar(0);
+            Scalar dr0_dgamma = Scalar(0);
+
+            // d r0 / d theta
+            if (theta - h < lo[0])
+                {
+                dr0_dtheta = (interp(theta + h, phi, alpha, beta, gamma) - r0) / h;
+                }
+            else if (theta + h > hi[0])
+                {
+                dr0_dtheta = (r0 - interp(theta - h, phi, alpha, beta, gamma)) / h;
+                }
+            else
+                {
+                dr0_dtheta = (interp(theta + h, phi, alpha, beta, gamma)
+                              - interp(theta - h, phi, alpha, beta, gamma))
+                             / (Scalar(2) * h);
+                }
+
+            // d r0 / d phi
+            if (phi - h < lo[1])
+                {
+                dr0_dphi = (interp(theta, phi + h, alpha, beta, gamma) - r0) / h;
+                }
+            else if (phi + h > hi[1])
+                {
+                dr0_dphi = (r0 - interp(theta, phi - h, alpha, beta, gamma)) / h;
+                }
+            else
+                {
+                dr0_dphi = (interp(theta, phi + h, alpha, beta, gamma)
+                            - interp(theta, phi - h, alpha, beta, gamma))
+                           / (Scalar(2) * h);
+                }
+
+            // d r0 / d alpha
+            if (alpha - h < lo[2])
+                {
+                dr0_dalpha = (interp(theta, phi, alpha + h, beta, gamma) - r0) / h;
+                }
+            else if (alpha + h > hi[2])
+                {
+                dr0_dalpha = (r0 - interp(theta, phi, alpha - h, beta, gamma)) / h;
+                }
+            else
+                {
+                dr0_dalpha = (interp(theta, phi, alpha + h, beta, gamma)
+                              - interp(theta, phi, alpha - h, beta, gamma))
+                             / (Scalar(2) * h);
+                }
+
+            // d r0 / d beta
+            if (beta - h < lo[3])
+                {
+                dr0_dbeta = (interp(theta, phi, alpha, beta + h, gamma) - r0) / h;
+                }
+            else if (beta + h > hi[3])
+                {
+                dr0_dbeta = (r0 - interp(theta, phi, alpha, beta - h, gamma)) / h;
+                }
+            else
+                {
+                dr0_dbeta = (interp(theta, phi, alpha, beta + h, gamma)
+                             - interp(theta, phi, alpha, beta - h, gamma))
+                            / (Scalar(2) * h);
+                }
+
+            // d r0 / d gamma
+            if (gamma - h < lo[4])
+                {
+                dr0_dgamma = (interp(theta, phi, alpha, beta, gamma + h) - r0) / h;
+                }
+            else if (gamma + h > hi[4])
+                {
+                dr0_dgamma = (r0 - interp(theta, phi, alpha, beta, gamma - h)) / h;
+                }
+            else
+                {
+                dr0_dgamma = (interp(theta, phi, alpha, beta, gamma + h)
+                              - interp(theta, phi, alpha, beta, gamma - h))
+                             / (Scalar(2) * h);
+                }
+
+            // compute J
+            }
+
+        h_force.data[i].x += fi.x;
+        h_force.data[i].y += fi.y;
+        h_force.data[i].z += fi.z;
+        h_force.data[i].w += Scalar(0.0);
+
+        h_torque.data[i].x += ti.x;
+        h_torque.data[i].y += ti.y;
+        h_torque.data[i].z += ti.z;
+        h_torque.data[i].w += Scalar(0.0);
         }
     }
 
@@ -67,7 +373,7 @@ void export_ChebyshevAnisotropicPairPotential(pybind11::module& m)
             [](std::shared_ptr<SystemDefinition> sysdef,
                std::shared_ptr<NL> nlist,
                py::array_t<Scalar, py::array::c_style | py::array::forcecast> domain,
-               float r_cut,
+               Scalar r_cut,
                py::array_t<unsigned int, py::array::c_style | py::array::forcecast> terms,
                py::array_t<Scalar, py::array::c_style | py::array::forcecast> coeffs,
                py::array_t<Scalar, py::array::c_style | py::array::forcecast> r0_data)
@@ -121,7 +427,7 @@ void export_ChebyshevAnisotropicPairPotential(pybind11::module& m)
                                                                            r0_shape.data());
             }))
         .def_property_readonly("r_cut", &ChebyshevAnisotropicPairPotential::getRCut)
-        .def_property_readonly("n_terms", &ChebyshevAnisotropicPairPotential::getNTerms);
+        .def_property_readonly("num_terms", &ChebyshevAnisotropicPairPotential::getNTerms);
     }
 
     } // end namespace detail
diff --git a/src/ChebyshevAnisotropicPairPotential.h b/src/ChebyshevAnisotropicPairPotential.h
index d054907..7fc549b 100644
--- a/src/ChebyshevAnisotropicPairPotential.h
+++ b/src/ChebyshevAnisotropicPairPotential.h
@@ -31,8 +31,6 @@ namespace hoomd
 namespace azplugins
     {
 
-template<typename T> class LinearInterpolator5D;
-
 class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
     {
     public:
@@ -40,7 +38,7 @@ class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
     ChebyshevAnisotropicPairPotential(std::shared_ptr<SystemDefinition> sysdef,
                                       std::shared_ptr<hoomd::md::NeighborList> nlist,
                                       const Scalar* domain,
-                                      const float r_cut,
+                                      const Scalar r_cut,
                                       const unsigned int* terms,
                                       const Scalar* coeffs,
                                       unsigned int Nterms,
@@ -63,7 +61,7 @@ class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
         }
 
     /// Read-only cutoff radius
-    const float getRCut() const
+    Scalar getRCut() const
         {
         return m_r_cut;
         }
@@ -81,7 +79,7 @@ class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
 
     GPUArray<Scalar2> m_domain; //!< Approximation domain (5x2): 5 rows, each is (min, max)
 
-    float m_r_cut; //!< cut-off distance in approximation domain
+    Scalar m_r_cut; //!< cut-off distance in approximation domain
 
     GPUArray<unsigned int> m_terms; //!< Chebyshev term list (Nterms x 6)
 
@@ -94,12 +92,6 @@ class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
     GPUArray<unsigned int> m_r0_shape; //!< Number of points used along each dimension to sample r0
     };
 
-namespace detail
-    {
-///! exports to Python
-void export_ChebyshevAnisotropicPairPotential(pybind11::module& m);
-    } // end namespace detail
-
     } // end namespace azplugins
     } // end namespace hoomd
 
diff --git a/src/LinearInterpolator5D.h b/src/LinearInterpolator5D.h
index 9e1f292..7340434 100644
--- a/src/LinearInterpolator5D.h
+++ b/src/LinearInterpolator5D.h
@@ -27,8 +27,10 @@ namespace azplugins
 class FiveDimensionalIndex
     {
     public:
-    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE FiveDimensionalIndex()
-        : n0_(0), n1_(0), n2_(0), n3_(0), n4_(0)
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE FiveDimensionalIndex() : m_n {0, 0, 0, 0, 0} { }
+
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE explicit FiveDimensionalIndex(const unsigned int* n)
+        : m_n {n[0], n[1], n[2], n[3], n[4]}
         {
         }
 
@@ -37,7 +39,7 @@ class FiveDimensionalIndex
                                                                     unsigned int n2,
                                                                     unsigned int n3,
                                                                     unsigned int n4)
-        : n0_(n0), n1_(n1), n2_(n2), n3_(n3), n4_(n4)
+        : m_n {n0, n1, n2, n3, n4}
         {
         }
 
@@ -48,139 +50,153 @@ class FiveDimensionalIndex
                                                                        unsigned int i4) const
         {
         unsigned int idx = i0;
-        idx = idx * n1_ + i1;
-        idx = idx * n2_ + i2;
-        idx = idx * n3_ + i3;
-        idx = idx * n4_ + i4;
+        idx = idx * m_n[1] + i1;
+        idx = idx * m_n[2] + i2;
+        idx = idx * m_n[3] + i3;
+        idx = idx * m_n[4] + i4;
         return idx;
         }
 
     AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE unsigned int size() const
         {
-        return n0_ * n1_ * n2_ * n3_ * n4_;
+        return m_n[0] * m_n[1] * m_n[2] * m_n[3] * m_n[4];
+        }
+
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE unsigned int getN(unsigned int dim) const
+        {
+        return m_n[dim];
         }
 
     private:
-    unsigned int n0_, n1_, n2_, n3_, n4_;
+    unsigned int m_n[5];
     };
 
-// T is the stored data type.
+/*! \brief 5D multilinear interpolation on a uniform rectilinear grid.
+
+    This is an extension of three-dimensional linear interpolation
+    from (https://github.com/mphowardlab/flyft/blob/main/src/grid_interpolator.cc).
+
+*/
 template<typename T> class LinearInterpolator5D
     {
     public:
-    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE LinearInterpolator5D() : data_(nullptr), nindex_()
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE LinearInterpolator5D() : m_data(nullptr), m_indexer()
         {
         for (int d = 0; d < 5; ++d)
             {
-            n_[d] = 0;
-            lo_[d] = Scalar(0);
-            hi_[d] = Scalar(0);
-            dx_[d] = Scalar(0);
+            m_lo[d] = Scalar(0);
+            m_hi[d] = Scalar(0);
+            m_dx[d] = Scalar(0);
             }
         }
 
     AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE
     LinearInterpolator5D(const T* data, const unsigned int* n, const Scalar* lo, const Scalar* hi)
-        : data_(data)
+        : m_data(data), m_indexer(n)
         {
         for (int d = 0; d < 5; ++d)
             {
-            n_[d] = n[d];
-            lo_[d] = lo[d];
-            hi_[d] = hi[d];
+            const unsigned int nd = n[d];
+            assert(nd >= 2);
 
-            assert(n_[d] >= 2);
-            dx_[d] = (hi_[d] - lo_[d]) / Scalar(n_[d] - 1);
+            m_lo[d] = lo[d];
+            m_hi[d] = hi[d];
+            m_dx[d] = (m_hi[d] - m_lo[d]) / Scalar(nd - 1);
             }
 
-        nindex_ = FiveDimensionalIndex(n_[0], n_[1], n_[2], n_[3], n_[4]);
+        assert(m_indexer.size() > 0);
         }
 
-    // Interpolate at (x0, x1, x2, x3, x4).
+    //! Interpolate at (x0, x1, x2, x3, x4).
     AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE Scalar
     operator()(Scalar x0, Scalar x1, Scalar x2, Scalar x3, Scalar x4) const
         {
         const Scalar x[5] = {x0, x1, x2, x3, x4};
 
-        Scalar f[5];
-        for (int d = 0; d < 5; ++d)
-            {
-            f[d] = (x[d] - lo_[d]) / dx_[d];
-            }
-
+        // Compute the cell bin and fractional coordinate in each dimension.
         int bin[5];
-        Scalar dloc[5];
+        Scalar frac[5];
 
-        for (int dim = 0; dim < 5; ++dim)
+        for (int d = 0; d < 5; ++d)
             {
-            bin[dim] = (int)std::floor((double)f[dim]);
+            const unsigned int nd = m_indexer.getN(static_cast<unsigned int>(d));
+            const Scalar f = (x[d] - m_lo[d]) / m_dx[d];
+
+            int b = static_cast<int>(std::floor(static_cast<double>(f)));
 
-            if (f[dim] == Scalar(n_[dim] - 1) && x[dim] == hi_[dim])
+            // If exactly at the top boundary, shift into the last valid cell so
+            // that (b+1) remains in bounds.
+            if (f == Scalar(nd - 1) && x[d] == m_hi[d])
                 {
-                --bin[dim];
+                --b;
                 }
 
-            dloc[dim] = f[dim] - Scalar(bin[dim]);
+            assert(b >= 0);
+            assert(b < static_cast<int>(nd) - 1);
 
-            assert(bin[dim] >= 0);
-            assert(bin[dim] < (int)n_[dim] - 1);
+            bin[d] = b;
+            frac[d] = f - Scalar(b);
             }
 
-        const Scalar xd = dloc[0];
-        const Scalar yd = dloc[1];
-        const Scalar zd = dloc[2];
-        const Scalar wd = dloc[3];
-        const Scalar vd = dloc[4];
-
+        // Load the 2^5=32 corners of the surrounding 5D cell.
+        Scalar c0[32];
         Scalar c[32];
-        for (unsigned int mask = 0; mask < 32; ++mask)
-            {
-            const unsigned int i0 = (unsigned int)(bin[0] + ((mask >> 0) & 1u));
-            const unsigned int i1 = (unsigned int)(bin[1] + ((mask >> 1) & 1u));
-            const unsigned int i2 = (unsigned int)(bin[2] + ((mask >> 2) & 1u));
-            const unsigned int i3 = (unsigned int)(bin[3] + ((mask >> 3) & 1u));
-            const unsigned int i4 = (unsigned int)(bin[4] + ((mask >> 4) & 1u));
-
-            c[mask] = Scalar(data_[nindex_(i0, i1, i2, i3, i4)]);
-            }
 
-        Scalar c0[16];
-        for (unsigned int i = 0; i < 16; ++i)
+        for (unsigned int mask = 0; mask < 32; ++mask)
             {
-            c0[i] = c[2 * i] * (Scalar(1) - xd) + c[2 * i + 1] * xd;
+            const unsigned int i0
+                = static_cast<unsigned int>(bin[0] + static_cast<int>((mask >> 0) & 1u));
+            const unsigned int i1
+                = static_cast<unsigned int>(bin[1] + static_cast<int>((mask >> 1) & 1u));
+            const unsigned int i2
+                = static_cast<unsigned int>(bin[2] + static_cast<int>((mask >> 2) & 1u));
+            const unsigned int i3
+                = static_cast<unsigned int>(bin[3] + static_cast<int>((mask >> 3) & 1u));
+            const unsigned int i4
+                = static_cast<unsigned int>(bin[4] + static_cast<int>((mask >> 4) & 1u));
+
+            // Implicit conversion from T to Scalar is intended.
+            c0[mask] = m_data[m_indexer(i0, i1, i2, i3, i4)];
             }
 
-        Scalar c1[8];
-        for (unsigned int i = 0; i < 8; ++i)
-            {
-            c1[i] = c0[2 * i] * (Scalar(1) - yd) + c0[2 * i + 1] * yd;
-            }
+        // For each dimension d, collapse pairs of points that differ in bit d.
+        Scalar* in = c0;
+        Scalar* out = c;
+        unsigned int len = 32;
 
-        Scalar c2[4];
-        for (unsigned int i = 0; i < 4; ++i)
+        for (int d = 0; d < 5; ++d)
             {
-            c2[i] = c1[2 * i] * (Scalar(1) - zd) + c1[2 * i + 1] * zd;
-            }
+            const Scalar t = frac[d];
+            const Scalar omt = Scalar(1) - t;
+            const unsigned int out_len = len / 2;
 
-        Scalar c3[2];
-        for (unsigned int i = 0; i < 2; ++i)
-            {
-            c3[i] = c2[2 * i] * (Scalar(1) - wd) + c2[2 * i + 1] * wd;
+            for (unsigned int i = 0; i < out_len; ++i)
+                {
+                out[i] = in[2 * i] * omt + in[2 * i + 1] * t;
+                }
+            // Swap input/output
+            Scalar* tmp = in;
+            in = out;
+            out = tmp;
+            len = out_len;
             }
 
-        return c3[0] * (Scalar(1) - vd) + c3[1] * vd;
+        // After 5 reductions, len==1 and in[0] holds the interpolated value.
+        return in[0];
         }
 
     private:
-    const T* data_;
-    unsigned int n_[5];
-    Scalar lo_[5];
-    Scalar hi_[5];
-    Scalar dx_[5];
-    FiveDimensionalIndex nindex_;
+    const T* m_data;
+    Scalar m_lo[5];
+    Scalar m_hi[5];
+    Scalar m_dx[5];
+    FiveDimensionalIndex m_indexer;
     };
 
     } // namespace azplugins
     } // namespace hoomd
 
+#undef AZPLUGINS_HOSTDEVICE
+#undef AZPLUGINS_FORCEINLINE
+
 #endif // AZPLUGINS_LINEAR_INTERPOLATOR_5D_H_
diff --git a/src/pair.py b/src/pair.py
index 8e982ae..56319f1 100644
--- a/src/pair.py
+++ b/src/pair.py
@@ -16,40 +16,32 @@
 class ChebyshevAnisotropicPairPotential(Force):
     """Chebyshev anisotropic pair potential."""
 
-    def __init__(self, nlist, domain, terms, coeffs, r0_data, r_cut=3.0):
+    def __init__(self, nlist, domain, terms, coeffs, r0, r_cut):
         super().__init__()
+
         self._nlist = nlist
 
-        self._domain = numpy.asarray(domain)
-        self._r_cut = float(r_cut)
+        param_dict = ParameterDict(r_cut=float)
+        param_dict["r_cut"] = float(r_cut)
+        self._param_dict.update(param_dict)
+
+        self._domain = numpy.asarray(domain, dtype=numpy.float64)
         self._terms = numpy.asarray(terms, dtype=numpy.uint32)
-        self._coeffs = numpy.asarray(coeffs)
-        self._r0_data = numpy.asarray(r0_data)
+        self._coeffs = numpy.asarray(coeffs, dtype=numpy.float64)
+
+        self.r0 = numpy.asarray(r0, dtype=numpy.float64)
 
         if self._domain.shape != (5, 2):
-            raise ValueError("domain must have shape (5, 2).")
+            raise ValueError("domain must have shape (5,2).")
         if self._terms.ndim != 2 or self._terms.shape[1] != 6:
-            raise ValueError("terms must have shape (Nterms, 6).")
-        nterms = int(self._terms.shape[0])
-        if self._coeffs.ndim != 1 or int(self._coeffs.shape[0]) != nterms:
-            raise ValueError("coeffs must have shape (Nterms,).")
-        if self._r0_data.ndim != 5:
-            raise ValueError("r0_data must be a 5D array.")
-
-    @property
-    def r_cut(self):
-        """Cut-off distance in approximation domain"""
-        return self._r_cut
+            raise ValueError("terms must have shape (Nterms,6).")
 
-    @property
-    def n_terms(self):
-        """Number of terms."""
-        return int(self._terms.shape[0])
+        n_terms = int(self._terms.shape[0])
+        if self._coeffs.ndim != 1 or int(self._coeffs.shape[0]) != n_terms:
+            raise ValueError("coeffs must have shape (Nterms,).")
 
-    @property
-    def r0_shape(self):
-        """r0 table shape."""
-        return tuple(int(x) for x in self._r0_data.shape)
+        if self.r0.ndim != 5:
+            raise ValueError("r0 must be a 5D array.")
 
     def _attach_hook(self):
         self._nlist._attach(self._simulation)
@@ -58,10 +50,10 @@ def _attach_hook(self):
             self._simulation.state._cpp_sys_def,
             self._nlist._cpp_obj,
             self._domain,
-            self._r_cut,
+            self.r_cut,
             self._terms,
             self._coeffs,
-            self._r0_data,
+            self.r0,
         )
 
         super()._attach_hook()
diff --git a/src/pytest/test_chebyshev.py b/src/pytest/test_chebyshev.py
index c0b16cc..30a3f51 100644
--- a/src/pytest/test_chebyshev.py
+++ b/src/pytest/test_chebyshev.py
@@ -7,33 +7,11 @@
 import hoomd.azplugins
 
 
-def test_attach_and_zero_force(simulation_factory, two_particle_snapshot_factory):
+def test_chebyshev_construct_attach_zero(
+    simulation_factory, two_particle_snapshot_factory
+):
     """Construct, attach, and check force/torque output."""
 
-    # Construct the Python object
-    nlist = hoomd.md.nlist.Cell(buffer=0.4)
-
-    domain = numpy.zeros((5, 2), dtype=numpy.float64)
-    terms = numpy.zeros((2, 6), dtype=numpy.uint32)
-    coeffs = numpy.zeros((2,), dtype=numpy.float64)
-    r0_data = numpy.zeros((2, 2, 2, 2, 2), dtype=numpy.float64)
-    r_cut = 3.0
-
-    pot = hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(
-        nlist=nlist,
-        domain=domain,
-        r_cut=r_cut,
-        terms=terms,
-        coeffs=coeffs,
-        r0_data=r0_data,
-    )
-
-    # Pre-attach checks
-    assert numpy.isclose(pot.r_cut, r_cut)
-    assert pot.n_terms == 2
-    assert pot.r0_shape == (2, 2, 2, 2, 2)
-
-    # Attach via a 0-step simulation
     snap = two_particle_snapshot_factory()
     if snap.communicator.rank == 0:
         snap.particles.position[:] = [[-0.5, 0.0, 0.0], [0.5, 0.0, 0.0]]
@@ -46,25 +24,57 @@ def test_attach_and_zero_force(simulation_factory, two_particle_snapshot_factory
     nve = hoomd.md.methods.ConstantVolume(hoomd.filter.All())
     integrator.methods = [nve]
 
+    nlist = hoomd.md.nlist.Cell(buffer=0.4)
+
+    domain = numpy.asarray(
+        [
+            [0.0, 2.0 * numpy.pi],  # theta
+            [0.0, numpy.pi],  # phi
+            [0.0, 2.0 * numpy.pi],  # alpha
+            [0.0, numpy.pi],  # beta
+            [0.0, 2.0 * numpy.pi],  # gamma
+        ],
+        dtype=numpy.float64,
+    )
+
+    terms = numpy.asarray(
+        [
+            [0, 0, 0, 0, 0, 0],
+            [1, 0, 2, 0, 1, 3],
+        ],
+        dtype=numpy.uint32,
+    )
+
+    coeffs = numpy.asarray([1.0, -0.25], dtype=numpy.float64)
+
+    # r0 must be 5D (and each dimension >= 2)
+    r0 = (numpy.arange(32, dtype=numpy.float64).reshape((2, 2, 2, 2, 2))) * 0.01
+
+    r_cut = 3.0
+
+    pot = hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(
+        nlist=nlist, domain=domain, terms=terms, coeffs=coeffs, r0=r0, r_cut=r_cut
+    )
+
+    assert numpy.isclose(pot.r_cut, r_cut)
+    assert isinstance(pot.r0, numpy.ndarray)
+    assert pot.r0.ndim == 5
+    assert pot.r0.shape == (2, 2, 2, 2, 2)
+
     integrator.forces = [pot]
     sim.operations.integrator = integrator
 
-    # Attach all objects
+    # attach
     sim.run(0)
 
-    # After attach
+    # check if attach happened
     assert hasattr(pot, "_cpp_obj")
     assert pot._cpp_obj is not None
 
-    # Post-attach checks
+    # recheck key properties after attach
     assert numpy.isclose(pot.r_cut, r_cut)
-    assert pot.n_terms == 2
-    assert pot.r0_shape == (2, 2, 2, 2, 2)
-
-    assert pot._cpp_obj.n_terms == 2
-    assert numpy.isclose(pot._cpp_obj.r_cut, r_cut)
+    assert pot.r0.shape == (2, 2, 2, 2, 2)
 
-    # Check Force/torque/energy outputs
     if sim.device.communicator.rank == 0:
         numpy.testing.assert_array_equal(pot.forces, numpy.zeros((2, 3)))
         numpy.testing.assert_array_equal(pot.torques, numpy.zeros((2, 3)))

From 3c07536137af05872f9ec4be485139552cd04162 Mon Sep 17 00:00:00 2001
From: Mohammadreza <mzf0069@auburn.edu>
Date: Fri, 10 Apr 2026 13:54:09 -0500
Subject: [PATCH 06/13] Implement force and torque calculation

---
 src/ChebyshevAnisotropicPairPotential.cc | 439 ++++++++++++++++-------
 src/ChebyshevAnisotropicPairPotential.h  |  26 +-
 src/LinearInterpolator5D.h               | 109 +++++-
 src/pytest/test_chebyshev.py             | 226 +++++++++++-
 4 files changed, 631 insertions(+), 169 deletions(-)

diff --git a/src/ChebyshevAnisotropicPairPotential.cc b/src/ChebyshevAnisotropicPairPotential.cc
index bc0b04c..b0a122e 100644
--- a/src/ChebyshevAnisotropicPairPotential.cc
+++ b/src/ChebyshevAnisotropicPairPotential.cc
@@ -3,26 +3,55 @@
 // Part of azplugins, released under the BSD 3-Clause License.
 
 /*!
- * \file ChebyshevAnisotropicPairPotential.h
+ * \file ChebyshevAnisotropicPairPotential.cc
  * \brief Definition of ChebyshevAnisotropicPairPotential
  */
 
 #include "ChebyshevAnisotropicPairPotential.h"
 #include "LinearInterpolator5D.h"
 
-#include "hoomd/BoxDim.h"
-#include "hoomd/VectorMath.h"
-
-#include <algorithm>
-#include <array>
-#include <cmath>
-#include <stdexcept>
-
 namespace hoomd
     {
 namespace azplugins
     {
 
+//! Scale a coordinate from [lo, hi] to the Chebyshev domain [-1, 1].
+static inline Scalar scaleToChebDomain(Scalar x, Scalar lo, Scalar hi)
+    {
+    return (Scalar(2) * (x - lo) / (hi - lo)) - Scalar(1);
+    }
+
+//! Evaluate Chebyshev polynomials of the first kind and their derivatives
+//! from degree 0 up to max_deg, using the three-term recurrence relation.
+/*!
+    T_0(x) = 1                       T'_0(x) = 0
+    T_1(x) = x                       T'_1(x) = 1
+    T_{n+1}(x) = 2x T_n - T_{n-1}   T'_{n+1}(x) = 2 T_n + 2x T'_n - T'_{n-1}
+
+    \param x        Evaluation point in [-1, 1]
+    \param max_deg  Highest polynomial degree to compute
+    \param T        Output: T[n] = T_n(x)  for n = 0 .. max_deg  (size >= max_deg+1)
+    \param dT       Output: dT[n] = T'_n(x) for n = 0 .. max_deg (size >= max_deg+1)
+*/
+static inline void evaluateChebyshev(Scalar x, unsigned int max_deg, Scalar* T, Scalar* dT)
+    {
+    T[0] = Scalar(1);
+    dT[0] = Scalar(0);
+
+    if (max_deg == 0)
+        return;
+
+    T[1] = x;
+    dT[1] = Scalar(1);
+
+    const Scalar two_x = Scalar(2) * x;
+    for (unsigned int n = 1; n < max_deg; ++n)
+        {
+        T[n + 1] = two_x * T[n] - T[n - 1];
+        dT[n + 1] = Scalar(2) * T[n] + two_x * dT[n] - dT[n - 1];
+        }
+    }
+
 ChebyshevAnisotropicPairPotential::ChebyshevAnisotropicPairPotential(
     std::shared_ptr<SystemDefinition> sysdef,
     std::shared_ptr<hoomd::md::NeighborList> nlist,
@@ -39,10 +68,11 @@ ChebyshevAnisotropicPairPotential::ChebyshevAnisotropicPairPotential(
         GPUArray<Scalar2> domain_arr(5, m_exec_conf);
         m_domain.swap(domain_arr);
 
+        const Index2D domain_index(2, 5);
         ArrayHandle<Scalar2> h_domain(m_domain, access_location::host, access_mode::readwrite);
         for (unsigned int d = 0; d < 5; ++d)
             {
-            h_domain.data[d] = make_scalar2(domain[2 * d], domain[2 * d + 1]);
+            h_domain.data[d] = make_scalar2(domain[domain_index(0, d)], domain[domain_index(1, d)]);
             }
         }
 
@@ -76,10 +106,10 @@ ChebyshevAnisotropicPairPotential::ChebyshevAnisotropicPairPotential(
         }
 
     // r0_data: flat array, length = product(r0_shape)
-    size_t n_r0 = 1;
+    unsigned int n_r0 = 1;
     for (unsigned int d = 0; d < 5; ++d)
         {
-        n_r0 *= static_cast<size_t>(r0_shape[d]);
+        n_r0 *= r0_shape[d];
         }
 
         {
@@ -89,9 +119,38 @@ ChebyshevAnisotropicPairPotential::ChebyshevAnisotropicPairPotential(
         ArrayHandle<Scalar> h_r0(m_r0_data, access_location::host, access_mode::readwrite);
         std::copy(r0_data, r0_data + n_r0, h_r0.data);
         }
+
+    // neighbor list subscriber
+    Scalar max_r0 = *std::max_element(r0_data, r0_data + n_r0);
+    m_nlist_r_cut = std::ceil(max_r0 + m_r_cut);
+
+    m_r_cut_nlist = std::make_shared<GPUArray<Scalar>>(1, m_exec_conf);
+        {
+        ArrayHandle<Scalar> h_r_cut_nlist(*m_r_cut_nlist,
+                                          access_location::host,
+                                          access_mode::overwrite);
+        h_r_cut_nlist.data[0] = m_nlist_r_cut;
+        }
+    m_nlist->addRCutMatrix(m_r_cut_nlist);
+    m_nlist->notifyRCutMatrixChange();
     }
 
-ChebyshevAnisotropicPairPotential::~ChebyshevAnisotropicPairPotential() { }
+ChebyshevAnisotropicPairPotential::~ChebyshevAnisotropicPairPotential()
+    {
+    if (m_attached)
+        {
+        m_nlist->removeRCutMatrix(m_r_cut_nlist);
+        }
+    }
+
+void ChebyshevAnisotropicPairPotential::notifyDetach()
+    {
+    if (m_attached)
+        {
+        m_nlist->removeRCutMatrix(m_r_cut_nlist);
+        }
+    m_attached = false;
+    }
 
 void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
     {
@@ -115,20 +174,43 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
     ArrayHandle<Scalar2> h_domain(m_domain, access_location::host, access_mode::read);
     ArrayHandle<Scalar> h_r0_data(m_r0_data, access_location::host, access_mode::read);
     ArrayHandle<unsigned int> h_r0_shape(m_r0_shape, access_location::host, access_mode::read);
+    ArrayHandle<unsigned int> h_terms(m_terms, access_location::host, access_mode::read);
+    ArrayHandle<Scalar> h_coeffs(m_coeffs, access_location::host, access_mode::read);
 
     const BoxDim box = m_pdata->getGlobalBox();
-    const Scalar rcutsq = m_r_cut * m_r_cut;
-    const Scalar h = Scalar(1.0e-6);
+    const Scalar nlist_rcutsq = m_nlist_r_cut * m_nlist_r_cut;
+    const Scalar fd_step = Scalar(1.0e-6);
+
+    LinearInterpolator5D<Scalar> interp(h_r0_data.data, h_r0_shape.data, h_domain.data);
+
+    // determine the maximum Chebyshev degree needed for each of the 6 coordinates.
+    unsigned int max_deg[6] = {0, 0, 0, 0, 0, 0};
+    for (unsigned int t = 0; t < m_Nterms; ++t)
+        {
+        for (unsigned int c = 0; c < 6; ++c)
+            {
+            const unsigned int deg = h_terms.data[t * 6 + c];
+            if (deg > max_deg[c])
+                max_deg[c] = deg;
+            }
+        }
 
-    Scalar lo[5];
-    Scalar hi[5];
+    // chain-rule scale factors: d(x_scaled)/d(x) = 2 / (hi - lo).
+    Scalar cheb_scale[6];
+    cheb_scale[0] = Scalar(2);
     for (unsigned int d = 0; d < 5; ++d)
         {
-        lo[d] = h_domain.data[d].x;
-        hi[d] = h_domain.data[d].y;
+        cheb_scale[d + 1] = Scalar(2) / (h_domain.data[d].y - h_domain.data[d].x);
         }
 
-    LinearInterpolator5D<Scalar> interp(h_r0_data.data, h_r0_shape.data, lo, hi);
+    // pre-allocate Chebyshev evaluation storage.
+    std::vector<Scalar> cheb_T[6];
+    std::vector<Scalar> cheb_dT[6];
+    for (unsigned int c = 0; c < 6; ++c)
+        {
+        cheb_T[c].resize(max_deg[c] + 1);
+        cheb_dT[c].resize(max_deg[c] + 1);
+        }
 
     // need to start from a zero force and torque
     m_force.zeroFill();
@@ -139,16 +221,23 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
 
     const unsigned int N = m_pdata->getN();
 
+    //! Euler-angle singularity tolerance for the alpha/gamma extraction.
+    const Scalar euler_singularity_tol = Scalar(1e-7);
+
+    //! beta threshold for the Jacobian (avoids 1/sin(beta) singulrity).
+    const Scalar beta_tol = Scalar(1e-5);
+
     for (unsigned int i = 0; i < N; ++i)
         {
         // particle i position and orientation
-        const Scalar3 pi = make_scalar3(h_pos.data[i].x, h_pos.data[i].y, h_pos.data[i].z);
+        const Scalar3 pos_i = make_scalar3(h_pos.data[i].x, h_pos.data[i].y, h_pos.data[i].z);
         const quat<Scalar> q_i(h_orientation.data[i]);
         const quat<Scalar> q_i_conj = conj(q_i);
 
-        // initialize current particle force and torque
+        // initialize particle force, torque, and energy
         Scalar3 fi = make_scalar3(0, 0, 0);
         Scalar3 ti = make_scalar3(0, 0, 0);
+        Scalar pei = Scalar(0);
 
         const size_t myHead = h_head_list.data[i];
         const unsigned int size = (unsigned int)h_n_neigh.data[i];
@@ -159,14 +248,14 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
             const unsigned int j = h_nlist.data[myHead + k];
             assert(j < m_pdata->getN() + m_pdata->getNGhosts());
 
-            const Scalar3 pj = make_scalar3(h_pos.data[j].x, h_pos.data[j].y, h_pos.data[j].z);
-            Scalar3 dx = pi - pj;
+            const Scalar3 pos_j = make_scalar3(h_pos.data[j].x, h_pos.data[j].y, h_pos.data[j].z);
+            Scalar3 dx = pos_i - pos_j;
             // apply periodic boundary conditions
             dx = box.minImage(dx);
 
-            // cut-off check
+            // Neighbor-list cutoff check (center-center distance).
             const Scalar rsq = dot(dx, dx);
-            if (rsq > rcutsq)
+            if (rsq > nlist_rcutsq)
                 {
                 continue;
                 }
@@ -174,9 +263,12 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
             // particle j, orientation quaternion
             const quat<Scalar> q_j(h_orientation.data[j]);
             // dx is in lab frame, so rotate dx by conj(q_i)
-            const vec3<Scalar> dx_lab(dx.x, dx.y, dx.z);
-            const vec3<Scalar> dx_body = rotate(q_i_conj, dx_lab);
-            // relative orientation of j with respect to i
+            const vec3<Scalar> dx_body = rotate(q_i_conj, vec3<Scalar>(dx));
+
+            // Relative orientation of j with respect to i:
+            //     q_rel = conj(q_i) * q_j
+            // ref:
+            // https://www.mathworks.com/help/fusion/ug/rotations-orientation-and-quaternions.html
             const quat<Scalar> q_rel = q_i_conj * q_j;
 
             // convert position to spherical coordinates
@@ -184,170 +276,243 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
             Scalar theta = Scalar(0);
             Scalar phi = Scalar(0);
 
-            if (r > Scalar(0))
+            // skip overlapping particles.
+            if (r < Scalar(1e-12))
                 {
-                theta = std::atan2(dx_body.y, dx_body.x);
-                if (theta < Scalar(0))
-                    {
-                    theta += Scalar(2.0) * M_PI;
-                    }
+                continue;
+                }
 
-                Scalar cosphi = dx_body.z / r;
-                if (cosphi < Scalar(-1))
-                    {
-                    cosphi = Scalar(-1);
-                    }
-                else if (cosphi > Scalar(1))
-                    {
-                    cosphi = Scalar(1);
-                    }
+            theta = std::atan2(dx_body.y, dx_body.x);
+            if (theta < Scalar(0))
+                {
+                theta += Scalar(2.0) * M_PI;
+                }
 
-                phi = std::acos(cosphi);
+            Scalar cosphi = dx_body.z / r;
+            if (cosphi < Scalar(-1))
+                {
+                cosphi = Scalar(-1);
+                }
+            else if (cosphi > Scalar(1))
+                {
+                cosphi = Scalar(1);
                 }
+            phi = std::acos(cosphi);
 
-            // get the columns of an active rotation matrix
-            const vec3<Scalar> ex = rotate(q_rel, vec3<Scalar>(1, 0, 0));
-            const vec3<Scalar> ey = rotate(q_rel, vec3<Scalar>(0, 1, 0));
-            const vec3<Scalar> ez = rotate(q_rel, vec3<Scalar>(0, 0, 1));
+            // Build rotation matrix from the relative quaternion and extract
+            // ZXZ Euler angles (alpha, beta, gamma).
+            const rotmat3<Scalar> R(q_rel);
 
             Scalar alpha = Scalar(0);
             Scalar beta = Scalar(0);
             Scalar gamma = Scalar(0);
 
-            // get the rotation angles by R_ZXZ (body-fixed) = R_q
-            if (ez.z < Scalar(-1))
+            if (R.row2.z < Scalar(-1))
                 {
                 beta = Scalar(M_PI);
                 }
-            else if (ez.z > Scalar(1))
+            else if (R.row2.z > Scalar(1))
                 {
                 beta = Scalar(0);
                 }
             else
                 {
-                beta = std::acos(ez.z);
+                beta = std::acos(R.row2.z);
                 }
 
-            if (beta > Scalar(1e-7) && beta < Scalar(M_PI - 1e-7))
+            if (beta > euler_singularity_tol && beta < Scalar(M_PI) - euler_singularity_tol)
                 {
-                alpha = std::atan2(ez.x, -ez.y);
-                gamma = std::atan2(ex.z, ey.z);
-                }
-            else if (beta <= Scalar(1e-7))
-                {
-                alpha = Scalar(0);
-                gamma = std::atan2(ex.y, ex.x);
+                alpha = std::atan2(R.row0.z, -R.row1.z);
+                gamma = std::atan2(R.row2.x, R.row2.y);
+                if (alpha < Scalar(0))
+                    {
+                    alpha += Scalar(2) * M_PI;
+                    }
                 }
             else
                 {
                 alpha = Scalar(0);
-                gamma = std::atan2(-ex.y, ex.x);
+                gamma
+                    = std::atan2((beta <= euler_singularity_tol) ? R.row0.y : -R.row0.y, R.row0.x);
                 }
 
-            if (alpha < Scalar(0))
-                {
-                alpha += Scalar(2) * M_PI;
-                }
             if (gamma < Scalar(0))
                 {
                 gamma += Scalar(2) * M_PI;
                 }
 
-            // compute r0 and its derivatives
-            const Scalar r0 = interp(theta, phi, alpha, beta, gamma);
-            Scalar dr0_dtheta = Scalar(0);
-            Scalar dr0_dphi = Scalar(0);
-            Scalar dr0_dalpha = Scalar(0);
-            Scalar dr0_dbeta = Scalar(0);
-            Scalar dr0_dgamma = Scalar(0);
-
-            // d r0 / d theta
-            if (theta - h < lo[0])
-                {
-                dr0_dtheta = (interp(theta + h, phi, alpha, beta, gamma) - r0) / h;
-                }
-            else if (theta + h > hi[0])
+            // move beta away from 0 and pi to avoid 1/sin(beta)
+            // singularity in the Jacobian.
+            if (beta < beta_tol)
                 {
-                dr0_dtheta = (r0 - interp(theta - h, phi, alpha, beta, gamma)) / h;
+                beta = beta_tol;
                 }
-            else
+            else if (beta > Scalar(M_PI) - beta_tol)
                 {
-                dr0_dtheta = (interp(theta + h, phi, alpha, beta, gamma)
-                              - interp(theta - h, phi, alpha, beta, gamma))
-                             / (Scalar(2) * h);
+                beta = Scalar(M_PI) - beta_tol;
                 }
 
-            // d r0 / d phi
-            if (phi - h < lo[1])
-                {
-                dr0_dphi = (interp(theta, phi + h, alpha, beta, gamma) - r0) / h;
-                }
-            else if (phi + h > hi[1])
+            // move phi away from 0 and pi to avoid 1/sin(phi)
+            // singularity in the Jacobian (used the same threshold as beta).
+            if (phi < beta_tol)
                 {
-                dr0_dphi = (r0 - interp(theta, phi - h, alpha, beta, gamma)) / h;
+                phi = beta_tol;
                 }
-            else
+            else if (phi > Scalar(M_PI) - beta_tol)
                 {
-                dr0_dphi = (interp(theta, phi + h, alpha, beta, gamma)
-                            - interp(theta, phi - h, alpha, beta, gamma))
-                           / (Scalar(2) * h);
+                phi = Scalar(M_PI) - beta_tol;
                 }
 
-            // d r0 / d alpha
-            if (alpha - h < lo[2])
-                {
-                dr0_dalpha = (interp(theta, phi, alpha + h, beta, gamma) - r0) / h;
-                }
-            else if (alpha + h > hi[2])
-                {
-                dr0_dalpha = (r0 - interp(theta, phi, alpha - h, beta, gamma)) / h;
-                }
-            else
+            // compute r0 and its numerical derivatives.
+            const Scalar r0 = interp(theta, phi, alpha, beta, gamma);
+            const Scalar dr0_dtheta = interp.derivative(theta, phi, alpha, beta, gamma, 0, fd_step);
+            const Scalar dr0_dphi = interp.derivative(theta, phi, alpha, beta, gamma, 1, fd_step);
+            const Scalar dr0_dalpha = interp.derivative(theta, phi, alpha, beta, gamma, 2, fd_step);
+            const Scalar dr0_dbeta = interp.derivative(theta, phi, alpha, beta, gamma, 3, fd_step);
+            const Scalar dr0_dgamma = interp.derivative(theta, phi, alpha, beta, gamma, 4, fd_step);
+
+            // compute rho and apply domain checks.
+            // rho = (1/r - 1/r0) / (1/(r0 + r_cut) - 1/r0)
+            // rho > 1  =>  beyond the surface cutoff; skip.
+            // rho < 0  =>  overlap; shift to 0.
+            const Scalar inv_r = Scalar(1) / r;
+            const Scalar inv_r0 = Scalar(1) / r0;
+            const Scalar inv_r0_rcut = Scalar(1) / (r0 + m_r_cut);
+            const Scalar rho_denom = inv_r0_rcut - inv_r0;
+            const Scalar rho_num = inv_r - inv_r0;
+            Scalar rho = rho_num / rho_denom;
+
+            if (rho > Scalar(1))
                 {
-                dr0_dalpha = (interp(theta, phi, alpha + h, beta, gamma)
-                              - interp(theta, phi, alpha - h, beta, gamma))
-                             / (Scalar(2) * h);
+                continue;
                 }
 
-            // d r0 / d beta
-            if (beta - h < lo[3])
+            if (rho < Scalar(0))
                 {
-                dr0_dbeta = (interp(theta, phi, alpha, beta + h, gamma) - r0) / h;
-                }
-            else if (beta + h > hi[3])
-                {
-                dr0_dbeta = (r0 - interp(theta, phi, alpha, beta - h, gamma)) / h;
-                }
-            else
-                {
-                dr0_dbeta = (interp(theta, phi, alpha, beta + h, gamma)
-                             - interp(theta, phi, alpha, beta - h, gamma))
-                            / (Scalar(2) * h);
+                rho = Scalar(0);
                 }
 
-            // d r0 / d gamma
-            if (gamma - h < lo[4])
-                {
-                dr0_dgamma = (interp(theta, phi, alpha, beta, gamma + h) - r0) / h;
-                }
-            else if (gamma + h > hi[4])
+            // derivatives of rho with respect to r and r0, needed by
+            // the Jacobian.
+            const Scalar inv_r_sq = inv_r * inv_r;
+            const Scalar inv_r0_sq = inv_r0 * inv_r0;
+            const Scalar inv_r0_rcut_sq = inv_r0_rcut * inv_r0_rcut;
+            const Scalar rho_denom_sq = rho_denom * rho_denom;
+
+            const Scalar drho_dr = -inv_r_sq / rho_denom;
+            const Scalar drho_dr0
+                = (inv_r0_sq * rho_denom - rho_num * (inv_r0_sq - inv_r0_rcut_sq)) / rho_denom_sq;
+
+            // Chebyshev evaluation: scale each coordinate to [-1,1]
+            // and evaluate polynomials + derivatives up to max degree.
+            evaluateChebyshev(scaleToChebDomain(rho, Scalar(0), Scalar(1)),
+                              max_deg[0],
+                              cheb_T[0].data(),
+                              cheb_dT[0].data());
+
+            const Scalar ang_coords[5] = {theta, phi, alpha, beta, gamma};
+            for (unsigned int c = 0; c < 5; ++c)
                 {
-                dr0_dgamma = (r0 - interp(theta, phi, alpha, beta, gamma - h)) / h;
+                evaluateChebyshev(
+                    scaleToChebDomain(ang_coords[c], h_domain.data[c].x, h_domain.data[c].y),
+                    max_deg[c + 1],
+                    cheb_T[c + 1].data(),
+                    cheb_dT[c + 1].data());
                 }
-            else
+
+            // evaluate u and du/d(coord_k).
+            Scalar u = Scalar(0);
+            Scalar du[6] = {Scalar(0), Scalar(0), Scalar(0), Scalar(0), Scalar(0), Scalar(0)};
+
+            for (unsigned int t = 0; t < m_Nterms; ++t)
                 {
-                dr0_dgamma = (interp(theta, phi, alpha, beta, gamma + h)
-                              - interp(theta, phi, alpha, beta, gamma - h))
-                             / (Scalar(2) * h);
-                }
+                const unsigned int* degs = &h_terms.data[t * 6];
+                const Scalar coeff = h_coeffs.data[t];
+
+                Scalar T_vals[6];
+                Scalar dT_vals[6];
+                for (unsigned int c = 0; c < 6; ++c)
+                    {
+                    T_vals[c] = cheb_T[c][degs[c]];
+                    dT_vals[c] = cheb_dT[c][degs[c]];
+                    }
+
+                Scalar prefix[7];
+                prefix[0] = Scalar(1);
+                for (unsigned int c = 0; c < 6; ++c)
+                    {
+                    prefix[c + 1] = prefix[c] * T_vals[c];
+                    }
+
+                Scalar suffix[7];
+                suffix[6] = Scalar(1);
+                for (int c = 5; c >= 0; --c)
+                    {
+                    suffix[c] = suffix[c + 1] * T_vals[c];
+                    }
+
+                u += coeff * prefix[6];
 
-            // compute J
+                for (unsigned int c = 0; c < 6; ++c)
+                    {
+                    du[c] += coeff * dT_vals[c] * cheb_scale[c] * prefix[c] * suffix[c + 1];
+                    }
+                }
+            // Jacobian matrix J (6x6).
+            // J maps the potential-derivative vector
+            //   [du/drho, du/dtheta, du/dphi, du/dalpha, du/dbeta, du/dgamma]
+            // to the lab-frame force and torque:
+            //   [F_x, F_y, F_z, tau_x, tau_y, tau_z]
+            const Scalar c_th = std::cos(theta), s_th = std::sin(theta);
+            const Scalar c_ph = std::cos(phi), s_ph = std::sin(phi);
+            const Scalar c_b = std::cos(beta), s_b = std::sin(beta);
+            const Scalar c_a = std::cos(alpha), s_a = std::sin(alpha);
+
+            const Scalar inv_r_s_ph = inv_r / s_ph;
+            const Scalar inv_s_b = Scalar(1) / s_b;
+
+            // common products involving drho_dr0 and r0 derivatives.
+            const Scalar A = drho_dr0 * dr0_dtheta * inv_r_s_ph;
+            const Scalar B = drho_dr0 * dr0_dphi * inv_r;
+            const Scalar C = drho_dr0 * dr0_dalpha * inv_s_b;
+            const Scalar D = drho_dr0 * dr0_dgamma * inv_s_b;
+
+            // force
+            const Scalar f_x = (-c_th * s_ph * drho_dr + s_th * A - c_th * c_ph * B) * du[0]
+                               + (s_th * inv_r_s_ph) * du[1] + (-c_th * c_ph * inv_r) * du[2];
+
+            const Scalar f_y = (-s_th * s_ph * drho_dr - c_th * A - s_th * c_ph * B) * du[0]
+                               + (-c_th * inv_r_s_ph) * du[1] + (-s_th * c_ph * inv_r) * du[2];
+
+            const Scalar f_z = (-c_ph * drho_dr + s_ph * B) * du[0] + (s_ph * inv_r) * du[2];
+
+            // torque
+            const Scalar tau_x = (c_b * s_a * C - c_a * drho_dr0 * dr0_dbeta - s_a * D) * du[0]
+                                 + (c_b * s_a * inv_s_b) * du[3] + (-c_a) * du[4]
+                                 + (-s_a * inv_s_b) * du[5];
+
+            const Scalar tau_y = (-c_b * c_a * C - s_a * drho_dr0 * dr0_dbeta + c_a * D) * du[0]
+                                 + (-c_a * c_b * inv_s_b) * du[3] + (-s_a) * du[4]
+                                 + (c_a * inv_s_b) * du[5];
+
+            const Scalar tau_z = (-drho_dr0 * dr0_dalpha) * du[0] + (-Scalar(1)) * du[3];
+
+            // accumulate
+            fi.x += f_x;
+            fi.y += f_y;
+            fi.z += f_z;
+
+            ti.x += tau_x;
+            ti.y += tau_y;
+            ti.z += tau_z;
+
+            pei += u;
             }
 
         h_force.data[i].x += fi.x;
         h_force.data[i].y += fi.y;
         h_force.data[i].z += fi.z;
-        h_force.data[i].w += Scalar(0.0);
+        h_force.data[i].w += pei;
 
         h_torque.data[i].x += ti.x;
         h_torque.data[i].y += ti.y;
@@ -404,7 +569,7 @@ void export_ChebyshevAnisotropicPairPotential(pybind11::module& m)
                     throw std::runtime_error("r0_data must be a 5D array.");
                     }
 
-                // Infer r0_shape from r0_data.shape
+                // infer r0_shape from r0_data.shape
                 std::array<unsigned int, 5> r0_shape;
                 for (unsigned int k = 0; k < 5; ++k)
                     {
diff --git a/src/ChebyshevAnisotropicPairPotential.h b/src/ChebyshevAnisotropicPairPotential.h
index 7fc549b..eccc7c0 100644
--- a/src/ChebyshevAnisotropicPairPotential.h
+++ b/src/ChebyshevAnisotropicPairPotential.h
@@ -48,6 +48,9 @@ class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
     //! Destructor
     virtual ~ChebyshevAnisotropicPairPotential();
 
+    //! Detach from the neighbor list (called when removing from simulation)
+    virtual void notifyDetach();
+
     // Getters
     std::shared_ptr<hoomd::md::NeighborList> getNeighborList() const
         {
@@ -73,23 +76,32 @@ class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
         }
 
     protected:
-    void computeForces(uint64_t timestep) override;
+    // member variables
 
     std::shared_ptr<hoomd::md::NeighborList> m_nlist; //!< Neighbor list
 
     GPUArray<Scalar2> m_domain; //!< Approximation domain (5x2): 5 rows, each is (min, max)
 
-    Scalar m_r_cut; //!< cut-off distance in approximation domain
+    Scalar m_r_cut; //!< Cut-off distance in approximation domain
 
-    GPUArray<unsigned int> m_terms; //!< Chebyshev term list (Nterms x 6)
+    Scalar m_nlist_r_cut; //!< Effective neighbor-list cutoff = ceil(max(r0_data) + r_cut)
 
-    GPUArray<Scalar> m_coeffs; //!< Coefficients corresponding to each term
+    /// r_cut matrix shared with the neighbor list (subscriber pattern)
+    std::shared_ptr<GPUArray<Scalar>> m_r_cut_nlist;
 
-    unsigned int m_Nterms; //!< Number of terms
+    /// Track whether we have attached to the Simulation object
+    bool m_attached = true;
 
-    GPUArray<Scalar> m_r0_data; //!< R0 data
+    GPUArray<unsigned int> m_terms; //!< Chebyshev term list (Nterms x 6)
+    GPUArray<Scalar> m_coeffs;      //!< Coefficients corresponding to each term
+    unsigned int m_Nterms;          //!< Number of terms
 
-    GPUArray<unsigned int> m_r0_shape; //!< Number of points used along each dimension to sample r0
+    GPUArray<Scalar> m_r0_data;        //!< R0 data
+    GPUArray<unsigned int> m_r0_shape; //!< Points per dimension to sample r0
+
+    // methods
+
+    void computeForces(uint64_t timestep) override;
     };
 
     } // end namespace azplugins
diff --git a/src/LinearInterpolator5D.h b/src/LinearInterpolator5D.h
index 7340434..e3bb1d8 100644
--- a/src/LinearInterpolator5D.h
+++ b/src/LinearInterpolator5D.h
@@ -8,6 +8,7 @@
 #include <cassert>
 #include <cmath>
 #include <cstdint>
+#include <utility>
 
 #include "hoomd/HOOMDMath.h"
 
@@ -24,21 +25,18 @@ namespace hoomd
 namespace azplugins
     {
 
-class FiveDimensionalIndex
+class Index5D
     {
     public:
-    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE FiveDimensionalIndex() : m_n {0, 0, 0, 0, 0} { }
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE Index5D() : m_n {0, 0, 0, 0, 0} { }
 
-    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE explicit FiveDimensionalIndex(const unsigned int* n)
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE explicit Index5D(const unsigned int* n)
         : m_n {n[0], n[1], n[2], n[3], n[4]}
         {
         }
 
-    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE FiveDimensionalIndex(unsigned int n0,
-                                                                    unsigned int n1,
-                                                                    unsigned int n2,
-                                                                    unsigned int n3,
-                                                                    unsigned int n4)
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE
+    Index5D(unsigned int n0, unsigned int n1, unsigned int n2, unsigned int n3, unsigned int n4)
         : m_n {n0, n1, n2, n3, n4}
         {
         }
@@ -107,6 +105,25 @@ template<typename T> class LinearInterpolator5D
         assert(m_indexer.size() > 0);
         }
 
+    //! Constructor accepting the domain as a Scalar2 array.
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE LinearInterpolator5D(const T* data,
+                                                                    const unsigned int* n,
+                                                                    const Scalar2* domain_s2)
+        : m_data(data), m_indexer(n)
+        {
+        for (int d = 0; d < 5; ++d)
+            {
+            const unsigned int nd = n[d];
+            assert(nd >= 2);
+
+            m_lo[d] = domain_s2[d].x;
+            m_hi[d] = domain_s2[d].y;
+            m_dx[d] = (m_hi[d] - m_lo[d]) / Scalar(nd - 1);
+            }
+
+        assert(m_indexer.size() > 0);
+        }
+
     //! Interpolate at (x0, x1, x2, x3, x4).
     AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE Scalar
     operator()(Scalar x0, Scalar x1, Scalar x2, Scalar x3, Scalar x4) const
@@ -122,7 +139,7 @@ template<typename T> class LinearInterpolator5D
             const unsigned int nd = m_indexer.getN(static_cast<unsigned int>(d));
             const Scalar f = (x[d] - m_lo[d]) / m_dx[d];
 
-            int b = static_cast<int>(std::floor(static_cast<double>(f)));
+            int b = static_cast<int>(std::floor(f));
 
             // If exactly at the top boundary, shift into the last valid cell so
             // that (b+1) remains in bounds.
@@ -139,8 +156,7 @@ template<typename T> class LinearInterpolator5D
             }
 
         // Load the 2^5=32 corners of the surrounding 5D cell.
-        Scalar c0[32];
-        Scalar c[32];
+        Scalar corners[32];
 
         for (unsigned int mask = 0; mask < 32; ++mask)
             {
@@ -156,12 +172,13 @@ template<typename T> class LinearInterpolator5D
                 = static_cast<unsigned int>(bin[4] + static_cast<int>((mask >> 4) & 1u));
 
             // Implicit conversion from T to Scalar is intended.
-            c0[mask] = m_data[m_indexer(i0, i1, i2, i3, i4)];
+            corners[mask] = m_data[m_indexer(i0, i1, i2, i3, i4)];
             }
 
         // For each dimension d, collapse pairs of points that differ in bit d.
-        Scalar* in = c0;
-        Scalar* out = c;
+        Scalar scratch[16];
+        Scalar* in = corners;
+        Scalar* out = scratch;
         unsigned int len = 32;
 
         for (int d = 0; d < 5; ++d)
@@ -175,9 +192,7 @@ template<typename T> class LinearInterpolator5D
                 out[i] = in[2 * i] * omt + in[2 * i + 1] * t;
                 }
             // Swap input/output
-            Scalar* tmp = in;
-            in = out;
-            out = tmp;
+            std::swap(in, out);
             len = out_len;
             }
 
@@ -185,12 +200,70 @@ template<typename T> class LinearInterpolator5D
         return in[0];
         }
 
+    //! Compute the finite-difference derivative with respect to dimensions.
+    /*! Uses central differences when possible, falling back to forward or backward
+        differences at the domain boundaries.
+
+        \param x0  Coordinate along dimension 0
+        \param x1  Coordinate along dimension 1
+        \param x2  Coordinate along dimension 2
+        \param x3  Coordinate along dimension 3
+        \param x4  Coordinate along dimension 4
+        \param dim Which dimension (0-4) to differentiate with respect to
+        \param h   Finite-difference step size
+    */
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE Scalar
+    derivative(Scalar x0, Scalar x1, Scalar x2, Scalar x3, Scalar x4, int dim, Scalar h) const
+        {
+        Scalar x[5] = {x0, x1, x2, x3, x4};
+        const Scalar val = (*this)(x[0], x[1], x[2], x[3], x[4]);
+
+        const bool at_lo = (x[dim] - h < m_lo[dim]);
+        const bool at_hi = (x[dim] + h > m_hi[dim]);
+
+        if (!at_lo && !at_hi)
+            {
+            // central difference
+            x[dim] += h;
+            const Scalar f_plus = (*this)(x[0], x[1], x[2], x[3], x[4]);
+            x[dim] -= Scalar(2) * h;
+            const Scalar f_minus = (*this)(x[0], x[1], x[2], x[3], x[4]);
+            return (f_plus - f_minus) / (Scalar(2) * h);
+            }
+        else if (at_lo)
+            {
+            // forward difference
+            x[dim] += h;
+            const Scalar f_plus = (*this)(x[0], x[1], x[2], x[3], x[4]);
+            return (f_plus - val) / h;
+            }
+        else
+            {
+            // backward difference
+            x[dim] -= h;
+            const Scalar f_minus = (*this)(x[0], x[1], x[2], x[3], x[4]);
+            return (val - f_minus) / h;
+            }
+        }
+
+    //! Return the lower bound for a given dimension.
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE Scalar getLo(int dim) const
+        {
+        return m_lo[dim];
+        }
+
+    //! Return the upper bound for a given dimension.
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE Scalar getHi(int dim) const
+        {
+        return m_hi[dim];
+        }
+
     private:
     const T* m_data;
     Scalar m_lo[5];
     Scalar m_hi[5];
     Scalar m_dx[5];
-    FiveDimensionalIndex m_indexer;
+    Index5D m_indexer;
     };
 
     } // namespace azplugins
diff --git a/src/pytest/test_chebyshev.py b/src/pytest/test_chebyshev.py
index 30a3f51..e95f7b0 100644
--- a/src/pytest/test_chebyshev.py
+++ b/src/pytest/test_chebyshev.py
@@ -5,13 +5,14 @@
 import numpy
 import hoomd
 import hoomd.azplugins
+from scipy.interpolate import RegularGridInterpolator
+from scipy.spatial.transform import Rotation
 
 
 def test_chebyshev_construct_attach_zero(
     simulation_factory, two_particle_snapshot_factory
 ):
     """Construct, attach, and check force/torque output."""
-
     snap = two_particle_snapshot_factory()
     if snap.communicator.rank == 0:
         snap.particles.position[:] = [[-0.5, 0.0, 0.0], [0.5, 0.0, 0.0]]
@@ -28,11 +29,11 @@ def test_chebyshev_construct_attach_zero(
 
     domain = numpy.asarray(
         [
-            [0.0, 2.0 * numpy.pi],  # theta
-            [0.0, numpy.pi],  # phi
-            [0.0, 2.0 * numpy.pi],  # alpha
-            [0.0, numpy.pi],  # beta
-            [0.0, 2.0 * numpy.pi],  # gamma
+            [0.0, 2.0 * numpy.pi],
+            [0.0, numpy.pi],
+            [0.0, 2.0 * numpy.pi],
+            [0.0, numpy.pi],
+            [0.0, 2.0 * numpy.pi],
         ],
         dtype=numpy.float64,
     )
@@ -45,7 +46,7 @@ def test_chebyshev_construct_attach_zero(
         dtype=numpy.uint32,
     )
 
-    coeffs = numpy.asarray([1.0, -0.25], dtype=numpy.float64)
+    coeffs = numpy.asarray([0.0, 0.0], dtype=numpy.float64)
 
     # r0 must be 5D (and each dimension >= 2)
     r0 = (numpy.arange(32, dtype=numpy.float64).reshape((2, 2, 2, 2, 2))) * 0.01
@@ -79,3 +80,214 @@ def test_chebyshev_construct_attach_zero(
         numpy.testing.assert_array_equal(pot.forces, numpy.zeros((2, 3)))
         numpy.testing.assert_array_equal(pot.torques, numpy.zeros((2, 3)))
         numpy.testing.assert_array_equal(pot.energies, numpy.zeros((2,)))
+
+
+def test_chebyshev_force_torque_energy_no_symmetry(
+    simulation_factory, two_particle_snapshot_factory
+):
+    """Test energy, force, and torque, without considering symmetry."""
+    rc = 3.0
+    phi_min = 1e-5
+    beta_min = 1e-5
+
+    domain = numpy.array(
+        [
+            [0.0, 2.0 * numpy.pi],
+            [phi_min, numpy.pi - phi_min],
+            [0.0, 2.0 * numpy.pi],
+            [beta_min, numpy.pi - phi_min],
+            [0.0, 2.0 * numpy.pi],
+        ],
+        dtype=numpy.float64,
+    )
+
+    terms = numpy.array(
+        [
+            [0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 1, 0, 0],
+            [1, 0, 0, 0, 0, 0],
+            [1, 0, 0, 1, 0, 0],
+        ],
+        dtype=numpy.uint32,
+    )
+    coeffs = numpy.array([1.0, 0.25, 1.5, -1.0], dtype=numpy.float64)
+
+    # r0 data: shape (3, 2, 3, 2, 3) = 108 values.
+    r0_data = numpy.array([1, 2.1, 3.2] * 36, dtype=numpy.float64).reshape(
+        3, 2, 3, 2, 3
+    )
+
+    theta_grid = numpy.linspace(0, 2 * numpy.pi, 3)
+    phi_grid = numpy.linspace(phi_min, numpy.pi - phi_min, 2)
+    alpha_grid = numpy.linspace(0, 2 * numpy.pi, 3)
+    beta_grid = numpy.linspace(beta_min, numpy.pi - phi_min, 2)
+    gamma_grid = numpy.linspace(0, 2 * numpy.pi, 3)
+
+    r0_interp = RegularGridInterpolator(
+        (theta_grid, phi_grid, alpha_grid, beta_grid, gamma_grid),
+        r0_data,
+        method="linear",
+        bounds_error=False,
+        fill_value=numpy.nan,
+    )
+
+    def rho_to_r(rho, r0, rc):
+        """Invert  rho = (1/r - 1/r0) / (1/(r0+rc) - 1/r0)  to obtain r."""
+        inv_r0 = 1.0 / r0
+        inv_r0_rc = 1.0 / (r0 + rc)
+        inv_r = rho * (inv_r0_rc - inv_r0) + inv_r0
+        return 1.0 / inv_r
+
+    def run_pair(rho, theta, phi, alpha, beta, gamma):
+        """Build a two-particle simulation, run for one step, and return
+        the potential object."""
+        snap = two_particle_snapshot_factory()
+        if snap.communicator.rank == 0:
+            r0 = float(r0_interp(numpy.array([theta, phi, alpha, beta, gamma]))[0])
+            r = rho_to_r(rho, r0, rc)
+
+            dx = r * numpy.sin(phi) * numpy.cos(theta)
+            dy = r * numpy.sin(phi) * numpy.sin(theta)
+            dz = r * numpy.cos(phi)
+
+            rot = Rotation.from_euler("ZXZ", [alpha, beta, gamma])
+            q_j = rot.as_quat(scalar_first=True)
+
+            snap.particles.position[0] = [0.0, 0.0, 0.0]
+            snap.particles.position[1] = [-dx, -dy, -dz]
+            snap.particles.orientation[0] = [1, 0, 0, 0]
+            snap.particles.orientation[1] = q_j
+            snap.particles.moment_inertia[:] = [0.1, 0.1, 0.1]
+
+        sim = simulation_factory(snap)
+
+        integrator = hoomd.md.Integrator(dt=0.001)
+        nve = hoomd.md.methods.ConstantVolume(hoomd.filter.All())
+        integrator.methods = [nve]
+
+        nlist = hoomd.md.nlist.Cell(buffer=1)
+        pot = hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(
+            nlist=nlist,
+            domain=domain,
+            terms=terms,
+            coeffs=coeffs,
+            r0=r0_data,
+            r_cut=rc,
+        )
+
+        integrator.forces = [pot]
+        sim.operations.integrator = integrator
+        sim.run(0)
+        return sim, pot
+
+    def check(sim, pot, expected_energy, expected_force, expected_torque):
+        """Compare the output on particle 0 to the Python reference (smolyay)."""
+        if sim.device.communicator.rank == 0:
+            numpy.testing.assert_allclose(
+                pot.energies[0], expected_energy, atol=1e-3, rtol=1e-3
+            )
+            numpy.testing.assert_allclose(
+                pot.forces[0], expected_force, atol=1e-3, rtol=1e-3
+            )
+            numpy.testing.assert_allclose(
+                pot.torques[0], expected_torque, atol=1e-3, rtol=1e-3
+            )
+
+    # point 1: interior
+    sim, pot = run_pair(
+        rho=0.2,
+        theta=numpy.pi / 4,
+        phi=numpy.pi / 4,
+        alpha=2 * numpy.pi / 5,
+        beta=numpy.pi / 2,
+        gamma=numpy.pi,
+    )
+    check(
+        sim,
+        pot,
+        expected_energy=-0.41,
+        expected_force=numpy.array([-1.324, -1.324, -1.872]),
+        expected_torque=numpy.array([0.944, -0.307, -0.271]),
+    )
+
+    # point 2: rho < 0
+    sim, pot = run_pair(
+        rho=-0.1,
+        theta=numpy.pi / 4,
+        phi=numpy.pi / 4,
+        alpha=2 * numpy.pi / 5,
+        beta=numpy.pi / 2,
+        gamma=numpy.pi,
+    )
+    check(
+        sim,
+        pot,
+        expected_energy=-1.25,
+        expected_force=numpy.array([-1.906, -1.906, -2.695]),
+        expected_torque=numpy.array([1.226, -0.398, -0.398]),
+    )
+
+    # point 3: rho < 0 and phi at upper boundary
+    sim, pot = run_pair(
+        rho=-0.1,
+        theta=numpy.pi / 4,
+        phi=numpy.pi - phi_min,
+        alpha=2 * numpy.pi / 15,
+        beta=numpy.pi / 2,
+        gamma=numpy.pi,
+    )
+    check(
+        sim,
+        pot,
+        expected_energy=-1.583,
+        expected_force=numpy.array([0.0, 0.0, 4.296]),
+        expected_torque=numpy.array([0.591, -1.327, -0.398]),
+    )
+
+    # point 4: beta at lower boundary
+    sim, pot = run_pair(
+        rho=0.2,
+        theta=numpy.pi / 4,
+        phi=numpy.pi / 4,
+        alpha=2 * numpy.pi / 5,
+        beta=beta_min,
+        gamma=numpy.pi,
+    )
+    check(
+        sim,
+        pot,
+        expected_energy=-0.41,
+        expected_force=numpy.array([-1.324, -1.324, -1.872]),
+        expected_torque=numpy.array([120148.0, -39038.6, -0.271]),
+    )
+
+    # point 5: interior with rho near 1
+    sim, pot = run_pair(
+        rho=0.95,
+        theta=numpy.pi / 4,
+        phi=numpy.pi / 6,
+        alpha=2 * numpy.pi / 5,
+        beta=numpy.pi / 2,
+        gamma=numpy.pi / 8,
+    )
+    check(
+        sim,
+        pot,
+        expected_energy=2.74,
+        expected_force=numpy.array([-0.174, -0.174, -0.427]),
+        expected_torque=numpy.array([0.207, -0.067, 0.207]),
+    )
+
+    # point 6: rho > 1, pair is beyond the surface cutoff
+    sim, pot = run_pair(
+        rho=1.05,
+        theta=numpy.pi / 4,
+        phi=numpy.pi / 6,
+        alpha=2 * numpy.pi / 5,
+        beta=numpy.pi / 2,
+        gamma=numpy.pi / 8,
+    )
+    if sim.device.communicator.rank == 0:
+        numpy.testing.assert_allclose(pot.energies[0], 0.0, atol=1e-10)
+        numpy.testing.assert_allclose(pot.forces[0], [0.0, 0.0, 0.0], atol=1e-10)
+        numpy.testing.assert_allclose(pot.torques[0], [0.0, 0.0, 0.0], atol=1e-10)

From c1fc34a771fddf7790bf927cbe2696c18c6512e0 Mon Sep 17 00:00:00 2001
From: Mohammadreza <mzf0069@auburn.edu>
Date: Fri, 10 Apr 2026 14:01:27 -0500
Subject: [PATCH 07/13] Update linear interpolant

---
 src/LinearInterpolator5D.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/LinearInterpolator5D.h b/src/LinearInterpolator5D.h
index e3bb1d8..9bffbe8 100644
--- a/src/LinearInterpolator5D.h
+++ b/src/LinearInterpolator5D.h
@@ -8,7 +8,6 @@
 #include <cassert>
 #include <cmath>
 #include <cstdint>
-#include <utility>
 
 #include "hoomd/HOOMDMath.h"
 

From e95b0dd68645535584c817e6861c4cd5364c4cbd Mon Sep 17 00:00:00 2001
From: Mohammadreza <mzf0069@auburn.edu>
Date: Thu, 16 Apr 2026 19:13:10 -0500
Subject: [PATCH 08/13] Update force and torque calculations and add symmetry
 evaluator.

---
 src/ChebyshevAnisotropicPairPotential.cc | 146 ++++++++-------
 src/LinearInterpolator5D.h               |  73 ++++++--
 src/ShapeSymmetry.h                      | 226 +++++++++++++++++++++++
 src/pytest/test_chebyshev.py             |  47 ++---
 4 files changed, 386 insertions(+), 106 deletions(-)
 create mode 100644 src/ShapeSymmetry.h

diff --git a/src/ChebyshevAnisotropicPairPotential.cc b/src/ChebyshevAnisotropicPairPotential.cc
index b0a122e..47ed72e 100644
--- a/src/ChebyshevAnisotropicPairPotential.cc
+++ b/src/ChebyshevAnisotropicPairPotential.cc
@@ -27,11 +27,6 @@ static inline Scalar scaleToChebDomain(Scalar x, Scalar lo, Scalar hi)
     T_0(x) = 1                       T'_0(x) = 0
     T_1(x) = x                       T'_1(x) = 1
     T_{n+1}(x) = 2x T_n - T_{n-1}   T'_{n+1}(x) = 2 T_n + 2x T'_n - T'_{n-1}
-
-    \param x        Evaluation point in [-1, 1]
-    \param max_deg  Highest polynomial degree to compute
-    \param T        Output: T[n] = T_n(x)  for n = 0 .. max_deg  (size >= max_deg+1)
-    \param dT       Output: dT[n] = T'_n(x) for n = 0 .. max_deg (size >= max_deg+1)
 */
 static inline void evaluateChebyshev(Scalar x, unsigned int max_deg, Scalar* T, Scalar* dT)
     {
@@ -157,7 +152,9 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
     // start by updating the neighborlist
     m_nlist->compute(timestep);
 
-    // access neighbor list, particle data, and simulation box.
+    // check neighbor list storage mode
+    const bool third_law = (m_nlist->getStorageMode() == hoomd::md::NeighborList::half);
+    // access neighbor list, particle data, and simulation box
     ArrayHandle<unsigned int> h_n_neigh(m_nlist->getNNeighArray(),
                                         access_location::host,
                                         access_mode::read);
@@ -183,7 +180,7 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
 
     LinearInterpolator5D<Scalar> interp(h_r0_data.data, h_r0_shape.data, h_domain.data);
 
-    // determine the maximum Chebyshev degree needed for each of the 6 coordinates.
+    // determine the maximum Chebyshev degree needed for each of the 6 coordinates
     unsigned int max_deg[6] = {0, 0, 0, 0, 0, 0};
     for (unsigned int t = 0; t < m_Nterms; ++t)
         {
@@ -195,7 +192,7 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
             }
         }
 
-    // chain-rule scale factors: d(x_scaled)/d(x) = 2 / (hi - lo).
+    // chain-rule scale factors: d(x_scaled)/d(x) = 2 / (hi - lo)
     Scalar cheb_scale[6];
     cheb_scale[0] = Scalar(2);
     for (unsigned int d = 0; d < 5; ++d)
@@ -203,16 +200,19 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
         cheb_scale[d + 1] = Scalar(2) / (h_domain.data[d].y - h_domain.data[d].x);
         }
 
-    // pre-allocate Chebyshev evaluation storage.
-    std::vector<Scalar> cheb_T[6];
-    std::vector<Scalar> cheb_dT[6];
+    // flat 1D Chebyshev storage
+    unsigned int max_deg_global = 0;
     for (unsigned int c = 0; c < 6; ++c)
         {
-        cheb_T[c].resize(max_deg[c] + 1);
-        cheb_dT[c].resize(max_deg[c] + 1);
+        if (max_deg[c] > max_deg_global)
+            max_deg_global = max_deg[c];
         }
 
-    // need to start from a zero force and torque
+    const Index2D cheb_idx(max_deg_global + 1, 6);
+    std::vector<Scalar> cheb_T_flat(cheb_idx.getNumElements());
+    std::vector<Scalar> cheb_dT_flat(cheb_idx.getNumElements());
+
+    // zero force and torque
     m_force.zeroFill();
     m_torque.zeroFill();
 
@@ -220,7 +220,6 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
     ArrayHandle<Scalar4> h_torque(m_torque, access_location::host, access_mode::readwrite);
 
     const unsigned int N = m_pdata->getN();
-
     //! Euler-angle singularity tolerance for the alpha/gamma extraction.
     const Scalar euler_singularity_tol = Scalar(1e-7);
 
@@ -252,7 +251,6 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
             Scalar3 dx = pos_i - pos_j;
             // apply periodic boundary conditions
             dx = box.minImage(dx);
-
             // Neighbor-list cutoff check (center-center distance).
             const Scalar rsq = dot(dx, dx);
             if (rsq > nlist_rcutsq)
@@ -300,7 +298,7 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
             phi = std::acos(cosphi);
 
             // Build rotation matrix from the relative quaternion and extract
-            // ZXZ Euler angles (alpha, beta, gamma).
+            // ZXZ Euler angles (alpha, beta, gamma)
             const rotmat3<Scalar> R(q_rel);
 
             Scalar alpha = Scalar(0);
@@ -320,7 +318,7 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
                 beta = std::acos(R.row2.z);
                 }
 
-            if (beta > euler_singularity_tol && beta < Scalar(M_PI) - euler_singularity_tol)
+            if (beta >= euler_singularity_tol && beta <= Scalar(M_PI) - euler_singularity_tol)
                 {
                 alpha = std::atan2(R.row0.z, -R.row1.z);
                 gamma = std::atan2(R.row2.x, R.row2.y);
@@ -341,40 +339,29 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
                 gamma += Scalar(2) * M_PI;
                 }
 
-            // move beta away from 0 and pi to avoid 1/sin(beta)
-            // singularity in the Jacobian.
-            if (beta < beta_tol)
-                {
-                beta = beta_tol;
-                }
-            else if (beta > Scalar(M_PI) - beta_tol)
-                {
-                beta = Scalar(M_PI) - beta_tol;
-                }
-
-            // move phi away from 0 and pi to avoid 1/sin(phi)
+            // move phi and beta away from 0 and pi to avoid 1/sin(beta or phi)
             // singularity in the Jacobian (used the same threshold as beta).
             if (phi < beta_tol)
-                {
                 phi = beta_tol;
-                }
             else if (phi > Scalar(M_PI) - beta_tol)
-                {
                 phi = Scalar(M_PI) - beta_tol;
-                }
 
-            // compute r0 and its numerical derivatives.
-            const Scalar r0 = interp(theta, phi, alpha, beta, gamma);
-            const Scalar dr0_dtheta = interp.derivative(theta, phi, alpha, beta, gamma, 0, fd_step);
-            const Scalar dr0_dphi = interp.derivative(theta, phi, alpha, beta, gamma, 1, fd_step);
-            const Scalar dr0_dalpha = interp.derivative(theta, phi, alpha, beta, gamma, 2, fd_step);
-            const Scalar dr0_dbeta = interp.derivative(theta, phi, alpha, beta, gamma, 3, fd_step);
-            const Scalar dr0_dgamma = interp.derivative(theta, phi, alpha, beta, gamma, 4, fd_step);
-
-            // compute rho and apply domain checks.
-            // rho = (1/r - 1/r0) / (1/(r0 + r_cut) - 1/r0)
-            // rho > 1  =>  beyond the surface cutoff; skip.
-            // rho < 0  =>  overlap; shift to 0.
+            if (beta < beta_tol)
+                beta = beta_tol;
+            else if (beta > Scalar(M_PI) - beta_tol)
+                beta = Scalar(M_PI) - beta_tol;
+
+            // compute r0 and all 5 derivatives
+            Scalar r0;
+            Scalar dr0[5];
+            interp.valueAndDerivatives(theta, phi, alpha, beta, gamma, fd_step, r0, dr0);
+            const Scalar dr0_dtheta = dr0[0];
+            const Scalar dr0_dphi = dr0[1];
+            const Scalar dr0_dalpha = dr0[2];
+            const Scalar dr0_dbeta = dr0[3];
+            const Scalar dr0_dgamma = dr0[4];
+
+            // compute rho
             const Scalar inv_r = Scalar(1) / r;
             const Scalar inv_r0 = Scalar(1) / r0;
             const Scalar inv_r0_rcut = Scalar(1) / (r0 + m_r_cut);
@@ -387,13 +374,14 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
                 continue;
                 }
 
+            // save raw rho for energy extrapolation if rho < 0
+            const Scalar rho_energy = rho;
             if (rho < Scalar(0))
                 {
                 rho = Scalar(0);
                 }
 
-            // derivatives of rho with respect to r and r0, needed by
-            // the Jacobian.
+            // drho/dr and drho/dr0
             const Scalar inv_r_sq = inv_r * inv_r;
             const Scalar inv_r0_sq = inv_r0 * inv_r0;
             const Scalar inv_r0_rcut_sq = inv_r0_rcut * inv_r0_rcut;
@@ -407,8 +395,8 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
             // and evaluate polynomials + derivatives up to max degree.
             evaluateChebyshev(scaleToChebDomain(rho, Scalar(0), Scalar(1)),
                               max_deg[0],
-                              cheb_T[0].data(),
-                              cheb_dT[0].data());
+                              cheb_T_flat.data() + cheb_idx(0, 0),
+                              cheb_dT_flat.data() + cheb_idx(0, 0));
 
             const Scalar ang_coords[5] = {theta, phi, alpha, beta, gamma};
             for (unsigned int c = 0; c < 5; ++c)
@@ -416,25 +404,25 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
                 evaluateChebyshev(
                     scaleToChebDomain(ang_coords[c], h_domain.data[c].x, h_domain.data[c].y),
                     max_deg[c + 1],
-                    cheb_T[c + 1].data(),
-                    cheb_dT[c + 1].data());
+                    cheb_T_flat.data() + cheb_idx(0, c + 1),
+                    cheb_dT_flat.data() + cheb_idx(0, c + 1));
                 }
 
-            // evaluate u and du/d(coord_k).
+            // evaluate u and du/d(coord_k)
             Scalar u = Scalar(0);
             Scalar du[6] = {Scalar(0), Scalar(0), Scalar(0), Scalar(0), Scalar(0), Scalar(0)};
 
             for (unsigned int t = 0; t < m_Nterms; ++t)
                 {
-                const unsigned int* degs = &h_terms.data[t * 6];
+                const unsigned int* degs = h_terms.data + 6 * t;
                 const Scalar coeff = h_coeffs.data[t];
 
                 Scalar T_vals[6];
                 Scalar dT_vals[6];
                 for (unsigned int c = 0; c < 6; ++c)
                     {
-                    T_vals[c] = cheb_T[c][degs[c]];
-                    dT_vals[c] = cheb_dT[c][degs[c]];
+                    T_vals[c] = cheb_T_flat[cheb_idx(degs[c], c)];
+                    dT_vals[c] = cheb_dT_flat[cheb_idx(degs[c], c)];
                     }
 
                 Scalar prefix[7];
@@ -458,26 +446,34 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
                     du[c] += coeff * dT_vals[c] * cheb_scale[c] * prefix[c] * suffix[c + 1];
                     }
                 }
+
+            // linear extrapolation for energy when rho < 0
+            u = (rho_energy < Scalar(0)) ? (u + rho_energy * du[0]) : u;
+
             // Jacobian matrix J (6x6).
             // J maps the potential-derivative vector
-            //   [du/drho, du/dtheta, du/dphi, du/dalpha, du/dbeta, du/dgamma]
+            // [du/drho, du/dtheta, du/dphi, du/dalpha, du/dbeta, du/dgamma]
             // to the lab-frame force and torque:
-            //   [F_x, F_y, F_z, tau_x, tau_y, tau_z]
-            const Scalar c_th = std::cos(theta), s_th = std::sin(theta);
-            const Scalar c_ph = std::cos(phi), s_ph = std::sin(phi);
-            const Scalar c_b = std::cos(beta), s_b = std::sin(beta);
-            const Scalar c_a = std::cos(alpha), s_a = std::sin(alpha);
+            // [F_x, F_y, F_z, tau_x, tau_y, tau_z]
+            Scalar s_th, c_th;
+            fast::sincos(theta, s_th, c_th);
+            Scalar s_ph, c_ph;
+            fast::sincos(phi, s_ph, c_ph);
+            Scalar s_b, c_b;
+            fast::sincos(beta, s_b, c_b);
+            Scalar s_a, c_a;
+            fast::sincos(alpha, s_a, c_a);
 
             const Scalar inv_r_s_ph = inv_r / s_ph;
             const Scalar inv_s_b = Scalar(1) / s_b;
 
-            // common products involving drho_dr0 and r0 derivatives.
+            // common products involving drho_dr0 and r0 derivatives
             const Scalar A = drho_dr0 * dr0_dtheta * inv_r_s_ph;
             const Scalar B = drho_dr0 * dr0_dphi * inv_r;
             const Scalar C = drho_dr0 * dr0_dalpha * inv_s_b;
             const Scalar D = drho_dr0 * dr0_dgamma * inv_s_b;
 
-            // force
+            // force (lab frame)
             const Scalar f_x = (-c_th * s_ph * drho_dr + s_th * A - c_th * c_ph * B) * du[0]
                                + (s_th * inv_r_s_ph) * du[1] + (-c_th * c_ph * inv_r) * du[2];
 
@@ -486,7 +482,7 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
 
             const Scalar f_z = (-c_ph * drho_dr + s_ph * B) * du[0] + (s_ph * inv_r) * du[2];
 
-            // torque
+            // torque (lab frame)
             const Scalar tau_x = (c_b * s_a * C - c_a * drho_dr0 * dr0_dbeta - s_a * D) * du[0]
                                  + (c_b * s_a * inv_s_b) * du[3] + (-c_a) * du[4]
                                  + (-s_a * inv_s_b) * du[5];
@@ -507,12 +503,25 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
             ti.z += tau_z;
 
             pei += u;
+
+            // Newton's third law for half neighbor list
+            if (third_law)
+                {
+                h_force.data[j].x -= f_x;
+                h_force.data[j].y -= f_y;
+                h_force.data[j].z -= f_z;
+                h_force.data[j].w += Scalar(0.5) * u;
+
+                h_torque.data[j].x -= tau_x;
+                h_torque.data[j].y -= tau_y;
+                h_torque.data[j].z -= tau_z;
+                }
             }
 
         h_force.data[i].x += fi.x;
         h_force.data[i].y += fi.y;
         h_force.data[i].z += fi.z;
-        h_force.data[i].w += pei;
+        h_force.data[i].w += Scalar(0.5) * pei;
 
         h_torque.data[i].x += ti.x;
         h_torque.data[i].y += ti.y;
@@ -543,13 +552,11 @@ void export_ChebyshevAnisotropicPairPotential(pybind11::module& m)
                py::array_t<Scalar, py::array::c_style | py::array::forcecast> coeffs,
                py::array_t<Scalar, py::array::c_style | py::array::forcecast> r0_data)
             {
-                // domain must be (5,2) - rho is always in (0, 1)
                 if (domain.ndim() != 2 || domain.shape(0) != 5 || domain.shape(1) != 2)
                     {
                     throw std::runtime_error("domain must have shape (5,2).");
                     }
 
-                // terms must be (Nterms,6)
                 if (terms.ndim() != 2 || terms.shape(1) != 6)
                     {
                     throw std::runtime_error("terms must have shape (Nterms,6).");
@@ -557,19 +564,16 @@ void export_ChebyshevAnisotropicPairPotential(pybind11::module& m)
 
                 const unsigned int Nterms = static_cast<unsigned int>(terms.shape(0));
 
-                // coeffs must be (Nterms,)
                 if (coeffs.ndim() != 1 || static_cast<unsigned int>(coeffs.shape(0)) != Nterms)
                     {
                     throw std::runtime_error("coeffs must have shape (Nterms,).");
                     }
 
-                // r0_data must be 5D
                 if (r0_data.ndim() != 5)
                     {
                     throw std::runtime_error("r0_data must be a 5D array.");
                     }
 
-                // infer r0_shape from r0_data.shape
                 std::array<unsigned int, 5> r0_shape;
                 for (unsigned int k = 0; k < 5; ++k)
                     {
diff --git a/src/LinearInterpolator5D.h b/src/LinearInterpolator5D.h
index 9bffbe8..b1a9c85 100644
--- a/src/LinearInterpolator5D.h
+++ b/src/LinearInterpolator5D.h
@@ -199,18 +199,7 @@ template<typename T> class LinearInterpolator5D
         return in[0];
         }
 
-    //! Compute the finite-difference derivative with respect to dimensions.
-    /*! Uses central differences when possible, falling back to forward or backward
-        differences at the domain boundaries.
-
-        \param x0  Coordinate along dimension 0
-        \param x1  Coordinate along dimension 1
-        \param x2  Coordinate along dimension 2
-        \param x3  Coordinate along dimension 3
-        \param x4  Coordinate along dimension 4
-        \param dim Which dimension (0-4) to differentiate with respect to
-        \param h   Finite-difference step size
-    */
+    //! Compute the finite-difference derivative with respect to a single dimension.
     AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE Scalar
     derivative(Scalar x0, Scalar x1, Scalar x2, Scalar x3, Scalar x4, int dim, Scalar h) const
         {
@@ -222,7 +211,6 @@ template<typename T> class LinearInterpolator5D
 
         if (!at_lo && !at_hi)
             {
-            // central difference
             x[dim] += h;
             const Scalar f_plus = (*this)(x[0], x[1], x[2], x[3], x[4]);
             x[dim] -= Scalar(2) * h;
@@ -245,12 +233,69 @@ template<typename T> class LinearInterpolator5D
             }
         }
 
+    //! Compute the interpolated value and all 5 partial derivatives in one call.
+    /*! Compute the finite-difference derivative with respect to dimensions.
+        Uses central differences when possible, falling back to forward or backward
+        differences at the domain boundaries.
+
+        \param x0    Coordinate along dimension 0
+        \param x1    Coordinate along dimension 1
+        \param x2    Coordinate along dimension 2
+        \param x3    Coordinate along dimension 3
+        \param x4    Coordinate along dimension 4
+        \param h     Finite-difference step size
+        \param value Interpolated value at (x0..x4)
+        \param deriv Array of 5 partial derivatives (one per dimension)
+    */
+    AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE void valueAndDerivatives(Scalar x0,
+                                                                        Scalar x1,
+                                                                        Scalar x2,
+                                                                        Scalar x3,
+                                                                        Scalar x4,
+                                                                        Scalar h,
+                                                                        Scalar& value,
+                                                                        Scalar* deriv) const
+        {
+        Scalar x[5] = {x0, x1, x2, x3, x4};
+        value = (*this)(x[0], x[1], x[2], x[3], x[4]);
+
+        for (int dim = 0; dim < 5; ++dim)
+            {
+            const Scalar x_orig = x[dim];
+            const bool at_lo = (x_orig - h < m_lo[dim]);
+            const bool at_hi = (x_orig + h > m_hi[dim]);
+
+            if (!at_lo && !at_hi)
+                {
+                // central difference
+                x[dim] = x_orig + h;
+                const Scalar f_plus = (*this)(x[0], x[1], x[2], x[3], x[4]);
+                x[dim] = x_orig - h;
+                const Scalar f_minus = (*this)(x[0], x[1], x[2], x[3], x[4]);
+                deriv[dim] = (f_plus - f_minus) / (Scalar(2) * h);
+                }
+            else if (at_lo)
+                {
+                // forward difference
+                x[dim] = x_orig + h;
+                const Scalar f_plus = (*this)(x[0], x[1], x[2], x[3], x[4]);
+                deriv[dim] = (f_plus - value) / h;
+                }
+            else
+                {
+                // backward difference
+                x[dim] = x_orig - h;
+                const Scalar f_minus = (*this)(x[0], x[1], x[2], x[3], x[4]);
+                deriv[dim] = (value - f_minus) / h;
+                }
+            x[dim] = x_orig;
+            }
+        }
     //! Return the lower bound for a given dimension.
     AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE Scalar getLo(int dim) const
         {
         return m_lo[dim];
         }
-
     //! Return the upper bound for a given dimension.
     AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE Scalar getHi(int dim) const
         {
diff --git a/src/ShapeSymmetry.h b/src/ShapeSymmetry.h
new file mode 100644
index 0000000..81277e2
--- /dev/null
+++ b/src/ShapeSymmetry.h
@@ -0,0 +1,226 @@
+// Copyright (c) 2018-2020, Michael P. Howard
+// Copyright (c) 2021-2025, Auburn University
+// Part of azplugins, released under the BSD 3-Clause License.
+
+/*!
+ * \file ShapeSymmetry.h
+ * \brief Symmetry evaluators that reduce angular coordinates to a fundamental domain.
+ */
+
+#ifndef AZPLUGINS_SHAPE_SYMMETRY_H_
+#define AZPLUGINS_SHAPE_SYMMETRY_H_
+
+#include "hoomd/HOOMDMath.h"
+#include "hoomd/VectorMath.h"
+
+#include <cmath>
+
+#ifndef __HIPCC__
+#include <string>
+#endif
+
+#if defined(__HIPCC__) || defined(__CUDACC__)
+#define AZPLUGINS_HOSTDEVICE __host__ __device__
+#define AZPLUGINS_FORCEINLINE __forceinline__
+#else
+#define AZPLUGINS_HOSTDEVICE
+#define AZPLUGINS_FORCEINLINE inline
+#endif
+
+namespace hoomd
+    {
+namespace azplugins
+    {
+namespace detail
+    {
+
+//! Convert spherical coordinates to Cartesian coordinates.
+AZPLUGINS_HOSTDEVICE inline vec3<Scalar> sphericalToCartesian(Scalar r, Scalar theta, Scalar phi)
+    {
+    Scalar s_th, c_th, s_ph, c_ph;
+    fast::sincos(theta, s_th, c_th);
+    fast::sincos(phi, s_ph, c_ph);
+    return vec3<Scalar>(r * s_ph * c_th, r * s_ph * s_th, r * c_ph);
+    }
+
+//! Convert Cartesian coordinates to spherical coordinates.
+AZPLUGINS_HOSTDEVICE inline void
+cartesianToSpherical(const vec3<Scalar>& v, Scalar& r, Scalar& theta, Scalar& phi)
+    {
+    r = fast::sqrt(dot(v, v));
+    if (r > Scalar(0))
+        {
+        theta = std::atan2(v.y, v.x);
+        if (theta < Scalar(0))
+            theta += Scalar(2) * Scalar(M_PI);
+        Scalar cp = v.z / r;
+        if (cp < Scalar(-1))
+            cp = Scalar(-1);
+        else if (cp > Scalar(1))
+            cp = Scalar(1);
+        phi = slow::acos(cp);
+        }
+    else
+        {
+        theta = Scalar(0);
+        phi = Scalar(0);
+        }
+    }
+
+//! Build a quaternion from an axis and an angle.
+AZPLUGINS_HOSTDEVICE inline quat<Scalar> quatFromAxisAngle(const vec3<Scalar>& axis, Scalar angle)
+    {
+    Scalar s, c;
+    fast::sincos(Scalar(0.5) * angle, s, c);
+    return quat<Scalar>(c, s * axis);
+    }
+
+//! Build a quaternion from intrinsic ZXZ Euler angles.
+AZPLUGINS_HOSTDEVICE inline quat<Scalar> quatFromEulerZXZ(Scalar alpha, Scalar beta, Scalar gamma)
+    {
+    const quat<Scalar> qz_a = quatFromAxisAngle(vec3<Scalar>(0, 0, 1), alpha);
+    const quat<Scalar> qx_b = quatFromAxisAngle(vec3<Scalar>(1, 0, 0), beta);
+    const quat<Scalar> qz_g = quatFromAxisAngle(vec3<Scalar>(0, 0, 1), gamma);
+    return qz_a * qx_b * qz_g;
+    }
+
+//! Extract intrinsic ZXZ Euler angles from a quaternion.
+AZPLUGINS_HOSTDEVICE inline void
+eulerFromQuat(const quat<Scalar>& q, Scalar& alpha, Scalar& beta, Scalar& gamma)
+    {
+    const rotmat3<Scalar> R(q);
+    const Scalar tol = Scalar(1e-7);
+
+    if (R.row2.z < Scalar(-1))
+        {
+        beta = Scalar(M_PI);
+        }
+    else if (R.row2.z > Scalar(1))
+        {
+        beta = Scalar(0);
+        }
+    else
+        {
+        beta = std::acos(R.row2.z);
+        }
+
+    if (beta > tol && beta < Scalar(M_PI) - tol)
+        {
+        alpha = std::atan2(R.row0.z, -R.row1.z);
+        gamma = std::atan2(R.row2.x, R.row2.y);
+        }
+    else if (beta <= tol)
+        {
+        alpha = Scalar(0);
+        gamma = std::atan2(R.row1.x, R.row0.x);
+        }
+    else
+        {
+        alpha = Scalar(0);
+        gamma = std::atan2(-R.row1.x, R.row0.x);
+        }
+
+    if (alpha < Scalar(0))
+        alpha += Scalar(2) * Scalar(M_PI);
+    if (gamma < Scalar(0))
+        gamma += Scalar(2) * Scalar(M_PI);
+    }
+
+    } // namespace detail
+
+//! Null symmetry: no reduction.
+/*! Full natural domain:
+    theta in [0, 2 pi], phi in [0, pi], alpha in [0, 2 pi],
+    beta in [0, pi], gamma in [0, 2 pi].
+*/
+class ShapeSymmetryNull
+    {
+    public:
+    //! Upper bounds of the reduced domain (lower bounds are always zero).
+    static constexpr Scalar domain_upper[5]
+        = {Scalar(2.0 * M_PI), Scalar(M_PI), Scalar(2.0 * M_PI), Scalar(M_PI), Scalar(2.0 * M_PI)};
+
+    AZPLUGINS_HOSTDEVICE ShapeSymmetryNull() { }
+
+#ifndef __HIPCC__
+    static std::string getName()
+        {
+        return "Null";
+        }
+#endif
+
+    AZPLUGINS_HOSTDEVICE quat<Scalar> reduce(Scalar& /*theta*/,
+                                             Scalar& /*phi*/,
+                                             Scalar& /*alpha*/,
+                                             Scalar& /*beta*/,
+                                             Scalar& /*gamma*/) const
+        {
+        return quat<Scalar>(Scalar(1), vec3<Scalar>(0, 0, 0));
+        }
+    };
+
+//! Tetrahedron symmetry evaluator.
+/*! Reduced domain:
+    theta in [0, 2 pi/3], phi in [0, pi], alpha in [0, 2 pi],
+    beta in [0, pi], gamma in [0, 2 pi/3].
+*/
+class ShapeSymmetryTetrahedron
+    {
+    public:
+    //! Upper bounds of the domain.
+    static constexpr Scalar domain_upper[5] = {Scalar(2.0 * M_PI / 3.0),
+                                               Scalar(M_PI),
+                                               Scalar(2.0 * M_PI),
+                                               Scalar(M_PI),
+                                               Scalar(2.0 * M_PI / 3.0)};
+
+    AZPLUGINS_HOSTDEVICE ShapeSymmetryTetrahedron() { }
+
+#ifndef __HIPCC__
+    static std::string getName()
+        {
+        return "Tetrahedron";
+        }
+#endif
+
+    AZPLUGINS_HOSTDEVICE quat<Scalar>
+    reduce(Scalar& theta, Scalar& phi, Scalar& alpha, Scalar& beta, Scalar& gamma) const
+        {
+        quat<Scalar> transformation(Scalar(1), vec3<Scalar>(0, 0, 0));
+
+        const Scalar theta_fold = Scalar(2) * Scalar(M_PI) / Scalar(3);
+
+        // fold theta into [0, 2 pi/3] by rotating around z.
+        if (theta > theta_fold)
+            {
+            vec3<Scalar> pos = detail::sphericalToCartesian(Scalar(1), theta, phi);
+
+            const Scalar n = slow::floor(theta / theta_fold);
+            const Scalar angle = -n * theta_fold;
+            const quat<Scalar> rot_z = detail::quatFromAxisAngle(vec3<Scalar>(0, 0, 1), angle);
+            pos = rotate(rot_z, pos);
+            transformation = rot_z * transformation;
+
+            Scalar r_tmp;
+            detail::cartesianToSpherical(pos, r_tmp, theta, phi);
+            }
+
+        // apply transformation to orientation, extract Euler angles.
+        quat<Scalar> q_orient = detail::quatFromEulerZXZ(alpha, beta, gamma);
+        quat<Scalar> q_transformed = transformation * q_orient;
+        detail::eulerFromQuat(q_transformed, alpha, beta, gamma);
+
+        // 3-fold gamma symmetry.
+        gamma = std::fmod(gamma, theta_fold);
+
+        return transformation;
+        }
+    };
+
+    } // namespace azplugins
+    } // namespace hoomd
+
+#undef AZPLUGINS_HOSTDEVICE
+#undef AZPLUGINS_FORCEINLINE
+
+#endif // AZPLUGINS_SHAPE_SYMMETRY_H_
diff --git a/src/pytest/test_chebyshev.py b/src/pytest/test_chebyshev.py
index e95f7b0..c65a422 100644
--- a/src/pytest/test_chebyshev.py
+++ b/src/pytest/test_chebyshev.py
@@ -48,7 +48,6 @@ def test_chebyshev_construct_attach_zero(
 
     coeffs = numpy.asarray([0.0, 0.0], dtype=numpy.float64)
 
-    # r0 must be 5D (and each dimension >= 2)
     r0 = (numpy.arange(32, dtype=numpy.float64).reshape((2, 2, 2, 2, 2))) * 0.01
 
     r_cut = 3.0
@@ -65,14 +64,10 @@ def test_chebyshev_construct_attach_zero(
     integrator.forces = [pot]
     sim.operations.integrator = integrator
 
-    # attach
     sim.run(0)
 
-    # check if attach happened
     assert hasattr(pot, "_cpp_obj")
     assert pot._cpp_obj is not None
-
-    # recheck key properties after attach
     assert numpy.isclose(pot.r_cut, r_cut)
     assert pot.r0.shape == (2, 2, 2, 2, 2)
 
@@ -85,7 +80,7 @@ def test_chebyshev_construct_attach_zero(
 def test_chebyshev_force_torque_energy_no_symmetry(
     simulation_factory, two_particle_snapshot_factory
 ):
-    """Test energy, force, and torque, without considering symmetry."""
+    """ "Test energy, force, and torque, without considering symmetry."""
     rc = 3.0
     phi_min = 1e-5
     beta_min = 1e-5
@@ -112,11 +107,11 @@ def test_chebyshev_force_torque_energy_no_symmetry(
     )
     coeffs = numpy.array([1.0, 0.25, 1.5, -1.0], dtype=numpy.float64)
 
-    # r0 data: shape (3, 2, 3, 2, 3) = 108 values.
     r0_data = numpy.array([1, 2.1, 3.2] * 36, dtype=numpy.float64).reshape(
         3, 2, 3, 2, 3
     )
 
+    # r0 interpolator
     theta_grid = numpy.linspace(0, 2 * numpy.pi, 3)
     phi_grid = numpy.linspace(phi_min, numpy.pi - phi_min, 2)
     alpha_grid = numpy.linspace(0, 2 * numpy.pi, 3)
@@ -132,15 +127,12 @@ def test_chebyshev_force_torque_energy_no_symmetry(
     )
 
     def rho_to_r(rho, r0, rc):
-        """Invert  rho = (1/r - 1/r0) / (1/(r0+rc) - 1/r0)  to obtain r."""
         inv_r0 = 1.0 / r0
         inv_r0_rc = 1.0 / (r0 + rc)
         inv_r = rho * (inv_r0_rc - inv_r0) + inv_r0
         return 1.0 / inv_r
 
     def run_pair(rho, theta, phi, alpha, beta, gamma):
-        """Build a two-particle simulation, run for one step, and return
-        the potential object."""
         snap = two_particle_snapshot_factory()
         if snap.communicator.rank == 0:
             r0 = float(r0_interp(numpy.array([theta, phi, alpha, beta, gamma]))[0])
@@ -181,11 +173,12 @@ def run_pair(rho, theta, phi, alpha, beta, gamma):
         return sim, pot
 
     def check(sim, pot, expected_energy, expected_force, expected_torque):
-        """Compare the output on particle 0 to the Python reference (smolyay)."""
+        """Compare both particles.  Particle 1 should obey Newton's third law."""
         if sim.device.communicator.rank == 0:
-            numpy.testing.assert_allclose(
-                pot.energies[0], expected_energy, atol=1e-3, rtol=1e-3
-            )
+            half_e = 0.5 * expected_energy
+
+            # particle 0
+            numpy.testing.assert_allclose(pot.energies[0], half_e, atol=1e-3, rtol=1e-3)
             numpy.testing.assert_allclose(
                 pot.forces[0], expected_force, atol=1e-3, rtol=1e-3
             )
@@ -193,6 +186,15 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
                 pot.torques[0], expected_torque, atol=1e-3, rtol=1e-3
             )
 
+            # particle 1 - Newton's third law
+            numpy.testing.assert_allclose(pot.energies[1], half_e, atol=1e-3, rtol=1e-3)
+            numpy.testing.assert_allclose(
+                pot.forces[1], -expected_force, atol=1e-3, rtol=1e-3
+            )
+            numpy.testing.assert_allclose(
+                pot.torques[1], -expected_torque, atol=1e-3, rtol=1e-3
+            )
+
     # point 1: interior
     sim, pot = run_pair(
         rho=0.2,
@@ -210,7 +212,7 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
         expected_torque=numpy.array([0.944, -0.307, -0.271]),
     )
 
-    # point 2: rho < 0
+    # point 2: rho < 0 (clamped for derivatives, extrapolated for energy)
     sim, pot = run_pair(
         rho=-0.1,
         theta=numpy.pi / 4,
@@ -222,14 +224,14 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
     check(
         sim,
         pot,
-        expected_energy=-1.25,
+        expected_energy=-1.67,
         expected_force=numpy.array([-1.906, -1.906, -2.695]),
         expected_torque=numpy.array([1.226, -0.398, -0.398]),
     )
 
-    # point 3: rho < 0 and phi at upper boundary
+    # point 3: phi at upper boundary
     sim, pot = run_pair(
-        rho=-0.1,
+        rho=0.0,
         theta=numpy.pi / 4,
         phi=numpy.pi - phi_min,
         alpha=2 * numpy.pi / 15,
@@ -240,8 +242,8 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
         sim,
         pot,
         expected_energy=-1.583,
-        expected_force=numpy.array([0.0, 0.0, 4.296]),
-        expected_torque=numpy.array([0.591, -1.327, -0.398]),
+        expected_force=numpy.array([0.0, 0.0, 3.832]),
+        expected_torque=numpy.array([0.546, -1.226, -0.398]),
     )
 
     # point 4: beta at lower boundary
@@ -278,7 +280,7 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
         expected_torque=numpy.array([0.207, -0.067, 0.207]),
     )
 
-    # point 6: rho > 1, pair is beyond the surface cutoff
+    # point 6: rho > 1, beyond surface cutoff - all zeros
     sim, pot = run_pair(
         rho=1.05,
         theta=numpy.pi / 4,
@@ -291,3 +293,6 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
         numpy.testing.assert_allclose(pot.energies[0], 0.0, atol=1e-10)
         numpy.testing.assert_allclose(pot.forces[0], [0.0, 0.0, 0.0], atol=1e-10)
         numpy.testing.assert_allclose(pot.torques[0], [0.0, 0.0, 0.0], atol=1e-10)
+        numpy.testing.assert_allclose(pot.energies[1], 0.0, atol=1e-10)
+        numpy.testing.assert_allclose(pot.forces[1], [0.0, 0.0, 0.0], atol=1e-10)
+        numpy.testing.assert_allclose(pot.torques[1], [0.0, 0.0, 0.0], atol=1e-10)

From 0d92038afb6d7a5ee74a9437f16d2c6c49b43d04 Mon Sep 17 00:00:00 2001
From: Mohammadreza <mzf0069@auburn.edu>
Date: Thu, 16 Apr 2026 19:25:47 -0500
Subject: [PATCH 09/13] Update documentation

---
 src/ChebyshevAnisotropicPairPotential.cc | 15 +++++++++++
 src/LinearInterpolator5D.h               | 33 ++++++++++++++++--------
 src/pytest/test_chebyshev.py             | 14 ++++++----
 3 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/src/ChebyshevAnisotropicPairPotential.cc b/src/ChebyshevAnisotropicPairPotential.cc
index 47ed72e..adef23f 100644
--- a/src/ChebyshevAnisotropicPairPotential.cc
+++ b/src/ChebyshevAnisotropicPairPotential.cc
@@ -27,6 +27,12 @@ static inline Scalar scaleToChebDomain(Scalar x, Scalar lo, Scalar hi)
     T_0(x) = 1                       T'_0(x) = 0
     T_1(x) = x                       T'_1(x) = 1
     T_{n+1}(x) = 2x T_n - T_{n-1}   T'_{n+1}(x) = 2 T_n + 2x T'_n - T'_{n-1}
+
+
+    \param x        Evaluation point in [-1, 1]
+    \param max_deg  Highest polynomial degree to compute
+    \param T        Output: T[n] = T_n(x)  for n = 0 .. max_deg  (size >= max_deg+1)
+    \param dT       Output: dT[n] = T'_n(x) for n = 0 .. max_deg (size >= max_deg+1)
 */
 static inline void evaluateChebyshev(Scalar x, unsigned int max_deg, Scalar* T, Scalar* dT)
     {
@@ -342,9 +348,13 @@ void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
             // move phi and beta away from 0 and pi to avoid 1/sin(beta or phi)
             // singularity in the Jacobian (used the same threshold as beta).
             if (phi < beta_tol)
+                {
                 phi = beta_tol;
+                }
             else if (phi > Scalar(M_PI) - beta_tol)
+                {
                 phi = Scalar(M_PI) - beta_tol;
+                }
 
             if (beta < beta_tol)
                 beta = beta_tol;
@@ -552,11 +562,13 @@ void export_ChebyshevAnisotropicPairPotential(pybind11::module& m)
                py::array_t<Scalar, py::array::c_style | py::array::forcecast> coeffs,
                py::array_t<Scalar, py::array::c_style | py::array::forcecast> r0_data)
             {
+                // domain must be (5,2) - rho is always in (0, 1)
                 if (domain.ndim() != 2 || domain.shape(0) != 5 || domain.shape(1) != 2)
                     {
                     throw std::runtime_error("domain must have shape (5,2).");
                     }
 
+                // terms must be (Nterms,6)
                 if (terms.ndim() != 2 || terms.shape(1) != 6)
                     {
                     throw std::runtime_error("terms must have shape (Nterms,6).");
@@ -564,16 +576,19 @@ void export_ChebyshevAnisotropicPairPotential(pybind11::module& m)
 
                 const unsigned int Nterms = static_cast<unsigned int>(terms.shape(0));
 
+                // coeffs must be (Nterms,)
                 if (coeffs.ndim() != 1 || static_cast<unsigned int>(coeffs.shape(0)) != Nterms)
                     {
                     throw std::runtime_error("coeffs must have shape (Nterms,).");
                     }
 
+                // r0_data must be 5D
                 if (r0_data.ndim() != 5)
                     {
                     throw std::runtime_error("r0_data must be a 5D array.");
                     }
 
+                // infer r0_shape from r0_data.shape
                 std::array<unsigned int, 5> r0_shape;
                 for (unsigned int k = 0; k < 5; ++k)
                     {
diff --git a/src/LinearInterpolator5D.h b/src/LinearInterpolator5D.h
index b1a9c85..887a801 100644
--- a/src/LinearInterpolator5D.h
+++ b/src/LinearInterpolator5D.h
@@ -68,10 +68,10 @@ class Index5D
     unsigned int m_n[5];
     };
 
-/*! \brief 5D multilinear interpolation on a uniform rectilinear grid.
+/*! \brief 5D multilinear interpolation on a uniform rectilinear grid
 
     This is an extension of three-dimensional linear interpolation
-    from (https://github.com/mphowardlab/flyft/blob/main/src/grid_interpolator.cc).
+    from (https://github.com/mphowardlab/flyft/blob/main/src/grid_interpolator.cc)
 
 */
 template<typename T> class LinearInterpolator5D
@@ -104,7 +104,7 @@ template<typename T> class LinearInterpolator5D
         assert(m_indexer.size() > 0);
         }
 
-    //! Constructor accepting the domain as a Scalar2 array.
+    //! Constructor accepting the domain as a Scalar2 array
     AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE LinearInterpolator5D(const T* data,
                                                                     const unsigned int* n,
                                                                     const Scalar2* domain_s2)
@@ -123,13 +123,13 @@ template<typename T> class LinearInterpolator5D
         assert(m_indexer.size() > 0);
         }
 
-    //! Interpolate at (x0, x1, x2, x3, x4).
+    //! Interpolate at (x0, x1, x2, x3, x4)
     AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE Scalar
     operator()(Scalar x0, Scalar x1, Scalar x2, Scalar x3, Scalar x4) const
         {
         const Scalar x[5] = {x0, x1, x2, x3, x4};
 
-        // Compute the cell bin and fractional coordinate in each dimension.
+        // Compute the cell bin and fractional coordinate in each dimension
         int bin[5];
         Scalar frac[5];
 
@@ -141,7 +141,7 @@ template<typename T> class LinearInterpolator5D
             int b = static_cast<int>(std::floor(f));
 
             // If exactly at the top boundary, shift into the last valid cell so
-            // that (b+1) remains in bounds.
+            // that (b+1) remains in bounds
             if (f == Scalar(nd - 1) && x[d] == m_hi[d])
                 {
                 --b;
@@ -154,7 +154,7 @@ template<typename T> class LinearInterpolator5D
             frac[d] = f - Scalar(b);
             }
 
-        // Load the 2^5=32 corners of the surrounding 5D cell.
+        // Load the 2^5=32 corners of the surrounding 5D cell
         Scalar corners[32];
 
         for (unsigned int mask = 0; mask < 32; ++mask)
@@ -170,11 +170,11 @@ template<typename T> class LinearInterpolator5D
             const unsigned int i4
                 = static_cast<unsigned int>(bin[4] + static_cast<int>((mask >> 4) & 1u));
 
-            // Implicit conversion from T to Scalar is intended.
+            // Implicit conversion from T to Scalar is intended
             corners[mask] = m_data[m_indexer(i0, i1, i2, i3, i4)];
             }
 
-        // For each dimension d, collapse pairs of points that differ in bit d.
+        // For each dimension d, collapse pairs of points that differ in bit d
         Scalar scratch[16];
         Scalar* in = corners;
         Scalar* out = scratch;
@@ -195,11 +195,22 @@ template<typename T> class LinearInterpolator5D
             len = out_len;
             }
 
-        // After 5 reductions, len==1 and in[0] holds the interpolated value.
+        // After 5 reductions, len==1 and in[0] holds the interpolated value
         return in[0];
         }
 
-    //! Compute the finite-difference derivative with respect to a single dimension.
+    //! Compute the finite-difference derivative with respect to a single dimension
+    /*! Uses central differences when possible, falling back to forward or backward
+        differences at the domain boundaries.
+
+        \param x0  Coordinate along dimension 0
+        \param x1  Coordinate along dimension 1
+        \param x2  Coordinate along dimension 2
+        \param x3  Coordinate along dimension 3
+        \param x4  Coordinate along dimension 4
+        \param dim Which dimension (0-4) to differentiate with respect to
+        \param h   Finite-difference step size
+    */
     AZPLUGINS_HOSTDEVICE AZPLUGINS_FORCEINLINE Scalar
     derivative(Scalar x0, Scalar x1, Scalar x2, Scalar x3, Scalar x4, int dim, Scalar h) const
         {
diff --git a/src/pytest/test_chebyshev.py b/src/pytest/test_chebyshev.py
index c65a422..8177547 100644
--- a/src/pytest/test_chebyshev.py
+++ b/src/pytest/test_chebyshev.py
@@ -47,7 +47,7 @@ def test_chebyshev_construct_attach_zero(
     )
 
     coeffs = numpy.asarray([0.0, 0.0], dtype=numpy.float64)
-
+    # r0 must be 5D (and each dimension >= 2)
     r0 = (numpy.arange(32, dtype=numpy.float64).reshape((2, 2, 2, 2, 2))) * 0.01
 
     r_cut = 3.0
@@ -63,11 +63,12 @@ def test_chebyshev_construct_attach_zero(
 
     integrator.forces = [pot]
     sim.operations.integrator = integrator
-
+    # attach
     sim.run(0)
-
+    # check if attach happened
     assert hasattr(pot, "_cpp_obj")
     assert pot._cpp_obj is not None
+    # recheck key properties after attach
     assert numpy.isclose(pot.r_cut, r_cut)
     assert pot.r0.shape == (2, 2, 2, 2, 2)
 
@@ -80,7 +81,7 @@ def test_chebyshev_construct_attach_zero(
 def test_chebyshev_force_torque_energy_no_symmetry(
     simulation_factory, two_particle_snapshot_factory
 ):
-    """ "Test energy, force, and torque, without considering symmetry."""
+    """Test energy, force, and torque, without considering symmetry."""
     rc = 3.0
     phi_min = 1e-5
     beta_min = 1e-5
@@ -106,7 +107,7 @@ def test_chebyshev_force_torque_energy_no_symmetry(
         dtype=numpy.uint32,
     )
     coeffs = numpy.array([1.0, 0.25, 1.5, -1.0], dtype=numpy.float64)
-
+    # r0 data: shape (3, 2, 3, 2, 3) = 108 values.
     r0_data = numpy.array([1, 2.1, 3.2] * 36, dtype=numpy.float64).reshape(
         3, 2, 3, 2, 3
     )
@@ -127,12 +128,15 @@ def test_chebyshev_force_torque_energy_no_symmetry(
     )
 
     def rho_to_r(rho, r0, rc):
+        """Invert  rho = (1/r - 1/r0) / (1/(r0+rc) - 1/r0)  to obtain r."""
         inv_r0 = 1.0 / r0
         inv_r0_rc = 1.0 / (r0 + rc)
         inv_r = rho * (inv_r0_rc - inv_r0) + inv_r0
         return 1.0 / inv_r
 
     def run_pair(rho, theta, phi, alpha, beta, gamma):
+        """Build a two-particle simulation, run for one step, and return
+        the potential object."""
         snap = two_particle_snapshot_factory()
         if snap.communicator.rank == 0:
             r0 = float(r0_interp(numpy.array([theta, phi, alpha, beta, gamma]))[0])

From 926b57fe3da8bb5b110049fa023fab0b2f60da1c Mon Sep 17 00:00:00 2001
From: Mohammadreza <mzf0069@auburn.edu>
Date: Sun, 19 Apr 2026 10:25:24 -0500
Subject: [PATCH 10/13] Add the symmetry class of cube

---
 src/ShapeSymmetry.h | 122 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)

diff --git a/src/ShapeSymmetry.h b/src/ShapeSymmetry.h
index 81277e2..4623e02 100644
--- a/src/ShapeSymmetry.h
+++ b/src/ShapeSymmetry.h
@@ -159,6 +159,128 @@ class ShapeSymmetryNull
         }
     };
 
+//! Cube symmetry evaluator.
+/*! Reduced domain:
+    theta in [0, pi/4], phi in [0, pi/2], alpha in [0, 2 pi],
+    beta in [0, arccos(1/sqrt(3))], gamma in [0, pi/2].
+*/
+class ShapeSymmetryCube
+    {
+    public:
+    //! Upper bounds of the reduced domain.
+    static constexpr Scalar domain_upper[5] = {Scalar(M_PI / 4.0),
+                                               Scalar(M_PI / 2.0),
+                                               Scalar(2.0 * M_PI),
+                                               Scalar(0.9553166181245093),
+                                               Scalar(M_PI / 2.0)};
+
+    AZPLUGINS_HOSTDEVICE ShapeSymmetryCube()
+        : m_rot_x_pi(detail::quatFromAxisAngle(vec3<Scalar>(1, 0, 0), Scalar(M_PI))),
+          m_rot_111(quat<Scalar>(Scalar(0.5), vec3<Scalar>(Scalar(0.5), Scalar(0.5), Scalar(0.5))))
+        {
+        }
+
+#ifndef __HIPCC__
+    static std::string getName()
+        {
+        return "Cube";
+        }
+#endif
+
+    AZPLUGINS_HOSTDEVICE quat<Scalar>
+    reduce(Scalar& theta, Scalar& phi, Scalar& alpha, Scalar& beta, Scalar& gamma) const
+        {
+        quat<Scalar> transformation(Scalar(1), vec3<Scalar>(0, 0, 0));
+
+        vec3<Scalar> pos = detail::sphericalToCartesian(Scalar(1), theta, phi);
+
+        // if phi > pi/2, rotate by pi around x to flip z.
+        if (phi > Scalar(M_PI) / Scalar(2))
+            {
+            pos = rotate(m_rot_x_pi, pos);
+            transformation = m_rot_x_pi * transformation;
+            }
+
+        // fold theta into [0, pi/2] by rotating around z.
+        Scalar r_tmp, th_tmp, ph_tmp;
+        detail::cartesianToSpherical(pos, r_tmp, th_tmp, ph_tmp);
+
+        const Scalar theta_fold = Scalar(M_PI) / Scalar(2);
+        if (th_tmp > theta_fold)
+            {
+            const Scalar angle = -slow::floor(th_tmp / theta_fold) * theta_fold;
+            const quat<Scalar> rot_z = detail::quatFromAxisAngle(vec3<Scalar>(0, 0, 1), angle);
+            pos = rotate(rot_z, pos);
+            transformation = rot_z * transformation;
+            }
+
+        // fold theta into [0, pi/4] using 120-degree rotations
+        // around the [111] body diagonal (up to 3 attempts).
+        const Scalar theta_max = Scalar(M_PI) / Scalar(4);
+        unsigned int n_rot = 0;
+        detail::cartesianToSpherical(pos, r_tmp, th_tmp, ph_tmp);
+        while (n_rot < 3 && th_tmp > theta_max)
+            {
+            pos = rotate(m_rot_111, pos);
+            transformation = m_rot_111 * transformation;
+            detail::cartesianToSpherical(pos, r_tmp, th_tmp, ph_tmp);
+            ++n_rot;
+            }
+
+        // Write back reduced position angles.
+        detail::cartesianToSpherical(pos, r_tmp, theta, phi);
+
+        // apply cumulative transformation to orientation and
+        // select the best candidate from 3 rotations around [111].
+        quat<Scalar> q_orient = detail::quatFromEulerZXZ(alpha, beta, gamma);
+        quat<Scalar> q_cand = transformation * q_orient;
+
+        Scalar best_a = Scalar(0);
+        Scalar best_b = Scalar(1e30);
+        Scalar best_g = Scalar(1e30);
+
+        for (unsigned int i = 0; i < 3; ++i)
+            {
+            Scalar a, b, g;
+            detail::eulerFromQuat(q_cand, a, b, g);
+
+            // if > pi/2, reflect.
+            if (b > Scalar(M_PI) / Scalar(2))
+                {
+                a = std::fmod(a + Scalar(M_PI), Scalar(2) * Scalar(M_PI));
+                b = Scalar(M_PI) - b;
+                g = Scalar(2) * Scalar(M_PI) - g;
+                if (g < Scalar(0))
+                    g += Scalar(2) * Scalar(M_PI);
+                }
+
+            // 4-fold gamma symmetry.
+            g = std::fmod(g, Scalar(M_PI) / Scalar(2));
+
+            // Lexicographic pick on (beta, gamma).
+            if (b < best_b - Scalar(1e-10) || (std::fabs(b - best_b) < Scalar(1e-10) && g < best_g))
+                {
+                best_a = a;
+                best_b = b;
+                best_g = g;
+                }
+
+            // Rotate candidate to the next [111] orientation.
+            q_cand = q_cand * m_rot_111;
+            }
+
+        alpha = best_a;
+        beta = best_b;
+        gamma = best_g;
+
+        return transformation;
+        }
+
+    private:
+    quat<Scalar> m_rot_x_pi; //!< Rotation by pi around x
+    quat<Scalar> m_rot_111;  //!< Rotation by 2 pi/3 around [1,1,1]/sqrt(3)
+    };
+
 //! Tetrahedron symmetry evaluator.
 /*! Reduced domain:
     theta in [0, 2 pi/3], phi in [0, pi], alpha in [0, 2 pi],

From c00f843b4bee9e84e79afdbab2b39b8f88e47dad Mon Sep 17 00:00:00 2001
From: Mohammadreza <mzf0069@auburn.edu>
Date: Fri, 24 Apr 2026 09:18:23 -0500
Subject: [PATCH 11/13] Refactor Chebyshev anisotropic pair potential for
 symmetry support

---
 src/CMakeLists.txt                            |  16 +-
 src/ChebyshevAnisotropicPairPotential.cc      | 619 ----------------
 src/ChebyshevAnisotropicPairPotential.h       | 641 ++++++++++++++++-
 src/ShapeSymmetry.h                           |  70 +-
 ...t_ChebyshevAnisotropicPairPotential.cc.inc |  33 +
 src/module.cc                                 |   8 +-
 src/pair.py                                   |  46 +-
 src/pytest/test_chebyshev.py                  | 676 +++++++++++++++---
 8 files changed, 1310 insertions(+), 799 deletions(-)
 delete mode 100644 src/ChebyshevAnisotropicPairPotential.cc
 create mode 100644 src/export_ChebyshevAnisotropicPairPotential.cc.inc

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 304b83f..d266daa 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,7 +3,6 @@ set(COMPONENT_NAME azplugins)
 
 # TODO: List all host C++ source code files in _${COMPONENT_NAME}_sources.
 set(_${COMPONENT_NAME}_sources
-    ChebyshevAnisotropicPairPotential.cc
     ConstantFlow.cc
     export_ImagePotentialBondHarmonic.cc
     module.cc
@@ -66,6 +65,12 @@ set(_aniso_pair_evaluators
     TwoPatchMorse
     )
 
+set(_chebyshev_symmetries
+    Null
+    Cube
+    Tetrahedron
+    )
+
 # process bond potentials
 foreach(_evaluator ${_bond_evaluators})
     configure_file(export_PotentialBond.cc.inc
@@ -161,6 +166,15 @@ foreach(_evaluator ${_aniso_pair_evaluators})
     endif()
 endforeach()
 
+# process Chebyshev anisotropic pair potential
+foreach(_symmetry ${_chebyshev_symmetries})
+    configure_file(export_ChebyshevAnisotropicPairPotential.cc.inc
+                   export_ChebyshevAnisotropicPairPotential${_symmetry}.cc
+                   @ONLY)
+    set(_${COMPONENT_NAME}_sources ${_${COMPONENT_NAME}_sources}
+        export_ChebyshevAnisotropicPairPotential${_symmetry}.cc)
+endforeach()
+
 # process velocity field geometries
 set(_binning_geometries
     Cartesian
diff --git a/src/ChebyshevAnisotropicPairPotential.cc b/src/ChebyshevAnisotropicPairPotential.cc
deleted file mode 100644
index adef23f..0000000
--- a/src/ChebyshevAnisotropicPairPotential.cc
+++ /dev/null
@@ -1,619 +0,0 @@
-// Copyright (c) 2018-2020, Michael P. Howard
-// Copyright (c) 2021-2025, Auburn University
-// Part of azplugins, released under the BSD 3-Clause License.
-
-/*!
- * \file ChebyshevAnisotropicPairPotential.cc
- * \brief Definition of ChebyshevAnisotropicPairPotential
- */
-
-#include "ChebyshevAnisotropicPairPotential.h"
-#include "LinearInterpolator5D.h"
-
-namespace hoomd
-    {
-namespace azplugins
-    {
-
-//! Scale a coordinate from [lo, hi] to the Chebyshev domain [-1, 1].
-static inline Scalar scaleToChebDomain(Scalar x, Scalar lo, Scalar hi)
-    {
-    return (Scalar(2) * (x - lo) / (hi - lo)) - Scalar(1);
-    }
-
-//! Evaluate Chebyshev polynomials of the first kind and their derivatives
-//! from degree 0 up to max_deg, using the three-term recurrence relation.
-/*!
-    T_0(x) = 1                       T'_0(x) = 0
-    T_1(x) = x                       T'_1(x) = 1
-    T_{n+1}(x) = 2x T_n - T_{n-1}   T'_{n+1}(x) = 2 T_n + 2x T'_n - T'_{n-1}
-
-
-    \param x        Evaluation point in [-1, 1]
-    \param max_deg  Highest polynomial degree to compute
-    \param T        Output: T[n] = T_n(x)  for n = 0 .. max_deg  (size >= max_deg+1)
-    \param dT       Output: dT[n] = T'_n(x) for n = 0 .. max_deg (size >= max_deg+1)
-*/
-static inline void evaluateChebyshev(Scalar x, unsigned int max_deg, Scalar* T, Scalar* dT)
-    {
-    T[0] = Scalar(1);
-    dT[0] = Scalar(0);
-
-    if (max_deg == 0)
-        return;
-
-    T[1] = x;
-    dT[1] = Scalar(1);
-
-    const Scalar two_x = Scalar(2) * x;
-    for (unsigned int n = 1; n < max_deg; ++n)
-        {
-        T[n + 1] = two_x * T[n] - T[n - 1];
-        dT[n + 1] = Scalar(2) * T[n] + two_x * dT[n] - dT[n - 1];
-        }
-    }
-
-ChebyshevAnisotropicPairPotential::ChebyshevAnisotropicPairPotential(
-    std::shared_ptr<SystemDefinition> sysdef,
-    std::shared_ptr<hoomd::md::NeighborList> nlist,
-    const Scalar* domain,
-    const Scalar r_cut,
-    const unsigned int* terms,
-    const Scalar* coeffs,
-    unsigned int Nterms,
-    const Scalar* r0_data,
-    const unsigned int* r0_shape)
-    : ForceCompute(sysdef), m_nlist(nlist), m_r_cut(r_cut), m_Nterms(Nterms)
-    {
-        {
-        GPUArray<Scalar2> domain_arr(5, m_exec_conf);
-        m_domain.swap(domain_arr);
-
-        const Index2D domain_index(2, 5);
-        ArrayHandle<Scalar2> h_domain(m_domain, access_location::host, access_mode::readwrite);
-        for (unsigned int d = 0; d < 5; ++d)
-            {
-            h_domain.data[d] = make_scalar2(domain[domain_index(0, d)], domain[domain_index(1, d)]);
-            }
-        }
-
-        // terms: shape (Nterms, 6), stored flat
-        {
-        GPUArray<unsigned int> terms_arr(static_cast<size_t>(Nterms) * 6, m_exec_conf);
-        m_terms.swap(terms_arr);
-
-        ArrayHandle<unsigned int> h_terms(m_terms, access_location::host, access_mode::readwrite);
-        std::copy(terms, terms + static_cast<size_t>(Nterms) * 6, h_terms.data);
-        }
-
-        // coeffs: shape (Nterms,)
-        {
-        GPUArray<Scalar> coeffs_arr(Nterms, m_exec_conf);
-        m_coeffs.swap(coeffs_arr);
-
-        ArrayHandle<Scalar> h_coeffs(m_coeffs, access_location::host, access_mode::readwrite);
-        std::copy(coeffs, coeffs + Nterms, h_coeffs.data);
-        }
-
-        // r0_shape: length 5
-        {
-        GPUArray<unsigned int> shape_arr(5, m_exec_conf);
-        m_r0_shape.swap(shape_arr);
-
-        ArrayHandle<unsigned int> h_shape(m_r0_shape,
-                                          access_location::host,
-                                          access_mode::readwrite);
-        std::copy(r0_shape, r0_shape + 5, h_shape.data);
-        }
-
-    // r0_data: flat array, length = product(r0_shape)
-    unsigned int n_r0 = 1;
-    for (unsigned int d = 0; d < 5; ++d)
-        {
-        n_r0 *= r0_shape[d];
-        }
-
-        {
-        GPUArray<Scalar> r0_arr(n_r0, m_exec_conf);
-        m_r0_data.swap(r0_arr);
-
-        ArrayHandle<Scalar> h_r0(m_r0_data, access_location::host, access_mode::readwrite);
-        std::copy(r0_data, r0_data + n_r0, h_r0.data);
-        }
-
-    // neighbor list subscriber
-    Scalar max_r0 = *std::max_element(r0_data, r0_data + n_r0);
-    m_nlist_r_cut = std::ceil(max_r0 + m_r_cut);
-
-    m_r_cut_nlist = std::make_shared<GPUArray<Scalar>>(1, m_exec_conf);
-        {
-        ArrayHandle<Scalar> h_r_cut_nlist(*m_r_cut_nlist,
-                                          access_location::host,
-                                          access_mode::overwrite);
-        h_r_cut_nlist.data[0] = m_nlist_r_cut;
-        }
-    m_nlist->addRCutMatrix(m_r_cut_nlist);
-    m_nlist->notifyRCutMatrixChange();
-    }
-
-ChebyshevAnisotropicPairPotential::~ChebyshevAnisotropicPairPotential()
-    {
-    if (m_attached)
-        {
-        m_nlist->removeRCutMatrix(m_r_cut_nlist);
-        }
-    }
-
-void ChebyshevAnisotropicPairPotential::notifyDetach()
-    {
-    if (m_attached)
-        {
-        m_nlist->removeRCutMatrix(m_r_cut_nlist);
-        }
-    m_attached = false;
-    }
-
-void ChebyshevAnisotropicPairPotential::computeForces(uint64_t timestep)
-    {
-    // start by updating the neighborlist
-    m_nlist->compute(timestep);
-
-    // check neighbor list storage mode
-    const bool third_law = (m_nlist->getStorageMode() == hoomd::md::NeighborList::half);
-    // access neighbor list, particle data, and simulation box
-    ArrayHandle<unsigned int> h_n_neigh(m_nlist->getNNeighArray(),
-                                        access_location::host,
-                                        access_mode::read);
-    ArrayHandle<unsigned int> h_nlist(m_nlist->getNListArray(),
-                                      access_location::host,
-                                      access_mode::read);
-    ArrayHandle<size_t> h_head_list(m_nlist->getHeadList(),
-                                    access_location::host,
-                                    access_mode::read);
-    ArrayHandle<Scalar4> h_pos(m_pdata->getPositions(), access_location::host, access_mode::read);
-    ArrayHandle<Scalar4> h_orientation(m_pdata->getOrientationArray(),
-                                       access_location::host,
-                                       access_mode::read);
-    ArrayHandle<Scalar2> h_domain(m_domain, access_location::host, access_mode::read);
-    ArrayHandle<Scalar> h_r0_data(m_r0_data, access_location::host, access_mode::read);
-    ArrayHandle<unsigned int> h_r0_shape(m_r0_shape, access_location::host, access_mode::read);
-    ArrayHandle<unsigned int> h_terms(m_terms, access_location::host, access_mode::read);
-    ArrayHandle<Scalar> h_coeffs(m_coeffs, access_location::host, access_mode::read);
-
-    const BoxDim box = m_pdata->getGlobalBox();
-    const Scalar nlist_rcutsq = m_nlist_r_cut * m_nlist_r_cut;
-    const Scalar fd_step = Scalar(1.0e-6);
-
-    LinearInterpolator5D<Scalar> interp(h_r0_data.data, h_r0_shape.data, h_domain.data);
-
-    // determine the maximum Chebyshev degree needed for each of the 6 coordinates
-    unsigned int max_deg[6] = {0, 0, 0, 0, 0, 0};
-    for (unsigned int t = 0; t < m_Nterms; ++t)
-        {
-        for (unsigned int c = 0; c < 6; ++c)
-            {
-            const unsigned int deg = h_terms.data[t * 6 + c];
-            if (deg > max_deg[c])
-                max_deg[c] = deg;
-            }
-        }
-
-    // chain-rule scale factors: d(x_scaled)/d(x) = 2 / (hi - lo)
-    Scalar cheb_scale[6];
-    cheb_scale[0] = Scalar(2);
-    for (unsigned int d = 0; d < 5; ++d)
-        {
-        cheb_scale[d + 1] = Scalar(2) / (h_domain.data[d].y - h_domain.data[d].x);
-        }
-
-    // flat 1D Chebyshev storage
-    unsigned int max_deg_global = 0;
-    for (unsigned int c = 0; c < 6; ++c)
-        {
-        if (max_deg[c] > max_deg_global)
-            max_deg_global = max_deg[c];
-        }
-
-    const Index2D cheb_idx(max_deg_global + 1, 6);
-    std::vector<Scalar> cheb_T_flat(cheb_idx.getNumElements());
-    std::vector<Scalar> cheb_dT_flat(cheb_idx.getNumElements());
-
-    // zero force and torque
-    m_force.zeroFill();
-    m_torque.zeroFill();
-
-    ArrayHandle<Scalar4> h_force(m_force, access_location::host, access_mode::readwrite);
-    ArrayHandle<Scalar4> h_torque(m_torque, access_location::host, access_mode::readwrite);
-
-    const unsigned int N = m_pdata->getN();
-    //! Euler-angle singularity tolerance for the alpha/gamma extraction.
-    const Scalar euler_singularity_tol = Scalar(1e-7);
-
-    //! beta threshold for the Jacobian (avoids 1/sin(beta) singulrity).
-    const Scalar beta_tol = Scalar(1e-5);
-
-    for (unsigned int i = 0; i < N; ++i)
-        {
-        // particle i position and orientation
-        const Scalar3 pos_i = make_scalar3(h_pos.data[i].x, h_pos.data[i].y, h_pos.data[i].z);
-        const quat<Scalar> q_i(h_orientation.data[i]);
-        const quat<Scalar> q_i_conj = conj(q_i);
-
-        // initialize particle force, torque, and energy
-        Scalar3 fi = make_scalar3(0, 0, 0);
-        Scalar3 ti = make_scalar3(0, 0, 0);
-        Scalar pei = Scalar(0);
-
-        const size_t myHead = h_head_list.data[i];
-        const unsigned int size = (unsigned int)h_n_neigh.data[i];
-
-        for (unsigned int k = 0; k < size; ++k)
-            {
-            // access the index
-            const unsigned int j = h_nlist.data[myHead + k];
-            assert(j < m_pdata->getN() + m_pdata->getNGhosts());
-
-            const Scalar3 pos_j = make_scalar3(h_pos.data[j].x, h_pos.data[j].y, h_pos.data[j].z);
-            Scalar3 dx = pos_i - pos_j;
-            // apply periodic boundary conditions
-            dx = box.minImage(dx);
-            // Neighbor-list cutoff check (center-center distance).
-            const Scalar rsq = dot(dx, dx);
-            if (rsq > nlist_rcutsq)
-                {
-                continue;
-                }
-
-            // particle j, orientation quaternion
-            const quat<Scalar> q_j(h_orientation.data[j]);
-            // dx is in lab frame, so rotate dx by conj(q_i)
-            const vec3<Scalar> dx_body = rotate(q_i_conj, vec3<Scalar>(dx));
-
-            // Relative orientation of j with respect to i:
-            //     q_rel = conj(q_i) * q_j
-            // ref:
-            // https://www.mathworks.com/help/fusion/ug/rotations-orientation-and-quaternions.html
-            const quat<Scalar> q_rel = q_i_conj * q_j;
-
-            // convert position to spherical coordinates
-            const Scalar r = fast::sqrt(dot(dx_body, dx_body));
-            Scalar theta = Scalar(0);
-            Scalar phi = Scalar(0);
-
-            // skip overlapping particles.
-            if (r < Scalar(1e-12))
-                {
-                continue;
-                }
-
-            theta = std::atan2(dx_body.y, dx_body.x);
-            if (theta < Scalar(0))
-                {
-                theta += Scalar(2.0) * M_PI;
-                }
-
-            Scalar cosphi = dx_body.z / r;
-            if (cosphi < Scalar(-1))
-                {
-                cosphi = Scalar(-1);
-                }
-            else if (cosphi > Scalar(1))
-                {
-                cosphi = Scalar(1);
-                }
-            phi = std::acos(cosphi);
-
-            // Build rotation matrix from the relative quaternion and extract
-            // ZXZ Euler angles (alpha, beta, gamma)
-            const rotmat3<Scalar> R(q_rel);
-
-            Scalar alpha = Scalar(0);
-            Scalar beta = Scalar(0);
-            Scalar gamma = Scalar(0);
-
-            if (R.row2.z < Scalar(-1))
-                {
-                beta = Scalar(M_PI);
-                }
-            else if (R.row2.z > Scalar(1))
-                {
-                beta = Scalar(0);
-                }
-            else
-                {
-                beta = std::acos(R.row2.z);
-                }
-
-            if (beta >= euler_singularity_tol && beta <= Scalar(M_PI) - euler_singularity_tol)
-                {
-                alpha = std::atan2(R.row0.z, -R.row1.z);
-                gamma = std::atan2(R.row2.x, R.row2.y);
-                if (alpha < Scalar(0))
-                    {
-                    alpha += Scalar(2) * M_PI;
-                    }
-                }
-            else
-                {
-                alpha = Scalar(0);
-                gamma
-                    = std::atan2((beta <= euler_singularity_tol) ? R.row0.y : -R.row0.y, R.row0.x);
-                }
-
-            if (gamma < Scalar(0))
-                {
-                gamma += Scalar(2) * M_PI;
-                }
-
-            // move phi and beta away from 0 and pi to avoid 1/sin(beta or phi)
-            // singularity in the Jacobian (used the same threshold as beta).
-            if (phi < beta_tol)
-                {
-                phi = beta_tol;
-                }
-            else if (phi > Scalar(M_PI) - beta_tol)
-                {
-                phi = Scalar(M_PI) - beta_tol;
-                }
-
-            if (beta < beta_tol)
-                beta = beta_tol;
-            else if (beta > Scalar(M_PI) - beta_tol)
-                beta = Scalar(M_PI) - beta_tol;
-
-            // compute r0 and all 5 derivatives
-            Scalar r0;
-            Scalar dr0[5];
-            interp.valueAndDerivatives(theta, phi, alpha, beta, gamma, fd_step, r0, dr0);
-            const Scalar dr0_dtheta = dr0[0];
-            const Scalar dr0_dphi = dr0[1];
-            const Scalar dr0_dalpha = dr0[2];
-            const Scalar dr0_dbeta = dr0[3];
-            const Scalar dr0_dgamma = dr0[4];
-
-            // compute rho
-            const Scalar inv_r = Scalar(1) / r;
-            const Scalar inv_r0 = Scalar(1) / r0;
-            const Scalar inv_r0_rcut = Scalar(1) / (r0 + m_r_cut);
-            const Scalar rho_denom = inv_r0_rcut - inv_r0;
-            const Scalar rho_num = inv_r - inv_r0;
-            Scalar rho = rho_num / rho_denom;
-
-            if (rho > Scalar(1))
-                {
-                continue;
-                }
-
-            // save raw rho for energy extrapolation if rho < 0
-            const Scalar rho_energy = rho;
-            if (rho < Scalar(0))
-                {
-                rho = Scalar(0);
-                }
-
-            // drho/dr and drho/dr0
-            const Scalar inv_r_sq = inv_r * inv_r;
-            const Scalar inv_r0_sq = inv_r0 * inv_r0;
-            const Scalar inv_r0_rcut_sq = inv_r0_rcut * inv_r0_rcut;
-            const Scalar rho_denom_sq = rho_denom * rho_denom;
-
-            const Scalar drho_dr = -inv_r_sq / rho_denom;
-            const Scalar drho_dr0
-                = (inv_r0_sq * rho_denom - rho_num * (inv_r0_sq - inv_r0_rcut_sq)) / rho_denom_sq;
-
-            // Chebyshev evaluation: scale each coordinate to [-1,1]
-            // and evaluate polynomials + derivatives up to max degree.
-            evaluateChebyshev(scaleToChebDomain(rho, Scalar(0), Scalar(1)),
-                              max_deg[0],
-                              cheb_T_flat.data() + cheb_idx(0, 0),
-                              cheb_dT_flat.data() + cheb_idx(0, 0));
-
-            const Scalar ang_coords[5] = {theta, phi, alpha, beta, gamma};
-            for (unsigned int c = 0; c < 5; ++c)
-                {
-                evaluateChebyshev(
-                    scaleToChebDomain(ang_coords[c], h_domain.data[c].x, h_domain.data[c].y),
-                    max_deg[c + 1],
-                    cheb_T_flat.data() + cheb_idx(0, c + 1),
-                    cheb_dT_flat.data() + cheb_idx(0, c + 1));
-                }
-
-            // evaluate u and du/d(coord_k)
-            Scalar u = Scalar(0);
-            Scalar du[6] = {Scalar(0), Scalar(0), Scalar(0), Scalar(0), Scalar(0), Scalar(0)};
-
-            for (unsigned int t = 0; t < m_Nterms; ++t)
-                {
-                const unsigned int* degs = h_terms.data + 6 * t;
-                const Scalar coeff = h_coeffs.data[t];
-
-                Scalar T_vals[6];
-                Scalar dT_vals[6];
-                for (unsigned int c = 0; c < 6; ++c)
-                    {
-                    T_vals[c] = cheb_T_flat[cheb_idx(degs[c], c)];
-                    dT_vals[c] = cheb_dT_flat[cheb_idx(degs[c], c)];
-                    }
-
-                Scalar prefix[7];
-                prefix[0] = Scalar(1);
-                for (unsigned int c = 0; c < 6; ++c)
-                    {
-                    prefix[c + 1] = prefix[c] * T_vals[c];
-                    }
-
-                Scalar suffix[7];
-                suffix[6] = Scalar(1);
-                for (int c = 5; c >= 0; --c)
-                    {
-                    suffix[c] = suffix[c + 1] * T_vals[c];
-                    }
-
-                u += coeff * prefix[6];
-
-                for (unsigned int c = 0; c < 6; ++c)
-                    {
-                    du[c] += coeff * dT_vals[c] * cheb_scale[c] * prefix[c] * suffix[c + 1];
-                    }
-                }
-
-            // linear extrapolation for energy when rho < 0
-            u = (rho_energy < Scalar(0)) ? (u + rho_energy * du[0]) : u;
-
-            // Jacobian matrix J (6x6).
-            // J maps the potential-derivative vector
-            // [du/drho, du/dtheta, du/dphi, du/dalpha, du/dbeta, du/dgamma]
-            // to the lab-frame force and torque:
-            // [F_x, F_y, F_z, tau_x, tau_y, tau_z]
-            Scalar s_th, c_th;
-            fast::sincos(theta, s_th, c_th);
-            Scalar s_ph, c_ph;
-            fast::sincos(phi, s_ph, c_ph);
-            Scalar s_b, c_b;
-            fast::sincos(beta, s_b, c_b);
-            Scalar s_a, c_a;
-            fast::sincos(alpha, s_a, c_a);
-
-            const Scalar inv_r_s_ph = inv_r / s_ph;
-            const Scalar inv_s_b = Scalar(1) / s_b;
-
-            // common products involving drho_dr0 and r0 derivatives
-            const Scalar A = drho_dr0 * dr0_dtheta * inv_r_s_ph;
-            const Scalar B = drho_dr0 * dr0_dphi * inv_r;
-            const Scalar C = drho_dr0 * dr0_dalpha * inv_s_b;
-            const Scalar D = drho_dr0 * dr0_dgamma * inv_s_b;
-
-            // force (lab frame)
-            const Scalar f_x = (-c_th * s_ph * drho_dr + s_th * A - c_th * c_ph * B) * du[0]
-                               + (s_th * inv_r_s_ph) * du[1] + (-c_th * c_ph * inv_r) * du[2];
-
-            const Scalar f_y = (-s_th * s_ph * drho_dr - c_th * A - s_th * c_ph * B) * du[0]
-                               + (-c_th * inv_r_s_ph) * du[1] + (-s_th * c_ph * inv_r) * du[2];
-
-            const Scalar f_z = (-c_ph * drho_dr + s_ph * B) * du[0] + (s_ph * inv_r) * du[2];
-
-            // torque (lab frame)
-            const Scalar tau_x = (c_b * s_a * C - c_a * drho_dr0 * dr0_dbeta - s_a * D) * du[0]
-                                 + (c_b * s_a * inv_s_b) * du[3] + (-c_a) * du[4]
-                                 + (-s_a * inv_s_b) * du[5];
-
-            const Scalar tau_y = (-c_b * c_a * C - s_a * drho_dr0 * dr0_dbeta + c_a * D) * du[0]
-                                 + (-c_a * c_b * inv_s_b) * du[3] + (-s_a) * du[4]
-                                 + (c_a * inv_s_b) * du[5];
-
-            const Scalar tau_z = (-drho_dr0 * dr0_dalpha) * du[0] + (-Scalar(1)) * du[3];
-
-            // accumulate
-            fi.x += f_x;
-            fi.y += f_y;
-            fi.z += f_z;
-
-            ti.x += tau_x;
-            ti.y += tau_y;
-            ti.z += tau_z;
-
-            pei += u;
-
-            // Newton's third law for half neighbor list
-            if (third_law)
-                {
-                h_force.data[j].x -= f_x;
-                h_force.data[j].y -= f_y;
-                h_force.data[j].z -= f_z;
-                h_force.data[j].w += Scalar(0.5) * u;
-
-                h_torque.data[j].x -= tau_x;
-                h_torque.data[j].y -= tau_y;
-                h_torque.data[j].z -= tau_z;
-                }
-            }
-
-        h_force.data[i].x += fi.x;
-        h_force.data[i].y += fi.y;
-        h_force.data[i].z += fi.z;
-        h_force.data[i].w += Scalar(0.5) * pei;
-
-        h_torque.data[i].x += ti.x;
-        h_torque.data[i].y += ti.y;
-        h_torque.data[i].z += ti.z;
-        h_torque.data[i].w += Scalar(0.0);
-        }
-    }
-
-namespace detail
-    {
-
-void export_ChebyshevAnisotropicPairPotential(pybind11::module& m)
-    {
-    namespace py = pybind11;
-    using NL = hoomd::md::NeighborList;
-
-    py::class_<ChebyshevAnisotropicPairPotential,
-               std::shared_ptr<ChebyshevAnisotropicPairPotential>>(
-        m,
-        "ChebyshevAnisotropicPairPotential",
-        py::base<hoomd::ForceCompute>())
-        .def(py::init(
-            [](std::shared_ptr<SystemDefinition> sysdef,
-               std::shared_ptr<NL> nlist,
-               py::array_t<Scalar, py::array::c_style | py::array::forcecast> domain,
-               Scalar r_cut,
-               py::array_t<unsigned int, py::array::c_style | py::array::forcecast> terms,
-               py::array_t<Scalar, py::array::c_style | py::array::forcecast> coeffs,
-               py::array_t<Scalar, py::array::c_style | py::array::forcecast> r0_data)
-            {
-                // domain must be (5,2) - rho is always in (0, 1)
-                if (domain.ndim() != 2 || domain.shape(0) != 5 || domain.shape(1) != 2)
-                    {
-                    throw std::runtime_error("domain must have shape (5,2).");
-                    }
-
-                // terms must be (Nterms,6)
-                if (terms.ndim() != 2 || terms.shape(1) != 6)
-                    {
-                    throw std::runtime_error("terms must have shape (Nterms,6).");
-                    }
-
-                const unsigned int Nterms = static_cast<unsigned int>(terms.shape(0));
-
-                // coeffs must be (Nterms,)
-                if (coeffs.ndim() != 1 || static_cast<unsigned int>(coeffs.shape(0)) != Nterms)
-                    {
-                    throw std::runtime_error("coeffs must have shape (Nterms,).");
-                    }
-
-                // r0_data must be 5D
-                if (r0_data.ndim() != 5)
-                    {
-                    throw std::runtime_error("r0_data must be a 5D array.");
-                    }
-
-                // infer r0_shape from r0_data.shape
-                std::array<unsigned int, 5> r0_shape;
-                for (unsigned int k = 0; k < 5; ++k)
-                    {
-                    const auto dim = r0_data.shape(k);
-                    if (dim < 2)
-                        {
-                        throw std::runtime_error("r0_data has invalid dimension(s).");
-                        }
-                    r0_shape[k] = static_cast<unsigned int>(dim);
-                    }
-
-                return std::make_shared<ChebyshevAnisotropicPairPotential>(sysdef,
-                                                                           nlist,
-                                                                           domain.data(),
-                                                                           r_cut,
-                                                                           terms.data(),
-                                                                           coeffs.data(),
-                                                                           Nterms,
-                                                                           r0_data.data(),
-                                                                           r0_shape.data());
-            }))
-        .def_property_readonly("r_cut", &ChebyshevAnisotropicPairPotential::getRCut)
-        .def_property_readonly("num_terms", &ChebyshevAnisotropicPairPotential::getNTerms);
-    }
-
-    } // end namespace detail
-    } // namespace azplugins
-    } // namespace hoomd
diff --git a/src/ChebyshevAnisotropicPairPotential.h b/src/ChebyshevAnisotropicPairPotential.h
index eccc7c0..6a8b6b6 100644
--- a/src/ChebyshevAnisotropicPairPotential.h
+++ b/src/ChebyshevAnisotropicPairPotential.h
@@ -4,7 +4,10 @@
 
 /*!
  * \file ChebyshevAnisotropicPairPotential.h
- * \brief Declaration of ChebyshevAnisotropicPairPotential
+ * \brief Templated class for the Chebyshev anisotropic pair potential.
+ *
+ * The class is templated on a \c ShapeSymmetryT parameter that provides
+ * a symmetry reduction of the angular coordinates.
  */
 
 #ifndef AZPLUGINS_CHEBYSHEV_ANISOTROPIC_PAIR_POTENTIAL_H_
@@ -14,27 +17,78 @@
 #error This header cannot be compiled by nvcc
 #endif
 
+#include <algorithm>
 #include <array>
+#include <cmath>
 #include <memory>
 #include <pybind11/pybind11.h>
 #include <vector>
 
+#include "hoomd/BoxDim.h"
 #include "hoomd/ForceCompute.h"
 #include "hoomd/GPUArray.h"
 #include "hoomd/HOOMDMath.h"
 #include "hoomd/Index1D.h"
-
+#include "hoomd/VectorMath.h"
 #include "hoomd/md/NeighborList.h"
 
+#include "LinearInterpolator5D.h"
+#include "ShapeSymmetry.h"
+
 namespace hoomd
     {
 namespace azplugins
     {
 
+//! Scale a coordinate from [lo, hi] to the Chebyshev domain [-1, 1].
+inline Scalar chebScale(Scalar x, Scalar lo, Scalar hi)
+    {
+    return (Scalar(2) * (x - lo) / (hi - lo)) - Scalar(1);
+    }
+
+//! Evaluate Chebyshev polynomials of the first kind and their derivatives
+//! from degree 0 up to max_deg, using the three-term recurrence relation.
+/*!
+    T_0(x) = 1                       T'_0(x) = 0
+    T_1(x) = x                       T'_1(x) = 1
+    T_{n+1}(x) = 2x T_n - T_{n-1}   T'_{n+1}(x) = 2 T_n + 2x T'_n - T'_{n-1}
+
+
+    \param x        Evaluation point in [-1, 1]
+    \param max_deg  Highest polynomial degree to compute
+    \param T        Output: T[n] = T_n(x)  for n = 0 .. max_deg  (size >= max_deg+1)
+    \param dT       Output: dT[n] = T'_n(x) for n = 0 .. max_deg (size >= max_deg+1)
+*/
+inline void chebEvaluate(Scalar x, unsigned int max_deg, Scalar* T, Scalar* dT)
+    {
+    T[0] = Scalar(1);
+    dT[0] = Scalar(0);
+
+    if (max_deg == 0)
+        return;
+
+    T[1] = x;
+    dT[1] = Scalar(1);
+
+    const Scalar two_x = Scalar(2) * x;
+    for (unsigned int n = 1; n < max_deg; ++n)
+        {
+        T[n + 1] = two_x * T[n] - T[n - 1];
+        dT[n + 1] = Scalar(2) * T[n] + two_x * dT[n] - dT[n - 1];
+        }
+    }
+
+//! Chebyshev anisotropic pair potential, templated on a symmetry reducer.
+/*!
+ * \tparam ShapeSymmetryT A class providing a static \c domain_upper[5] array
+ *         and a static \c reduce(theta, phi, alpha, beta, gamma) method that
+ *         maps the angles into a fundamental domain and returns the applied
+ *         rotation as a quaternion.  See \c ShapeSymmetry.h.
+ */
+template<class ShapeSymmetryT>
 class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
     {
     public:
-    //! Constructor
     ChebyshevAnisotropicPairPotential(std::shared_ptr<SystemDefinition> sysdef,
                                       std::shared_ptr<hoomd::md::NeighborList> nlist,
                                       const Scalar* domain,
@@ -44,7 +98,6 @@ class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
                                       unsigned int Nterms,
                                       const Scalar* r0_data,
                                       const unsigned int* r0_shape);
-
     //! Destructor
     virtual ~ChebyshevAnisotropicPairPotential();
 
@@ -80,17 +133,13 @@ class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
 
     std::shared_ptr<hoomd::md::NeighborList> m_nlist; //!< Neighbor list
 
-    GPUArray<Scalar2> m_domain; //!< Approximation domain (5x2): 5 rows, each is (min, max)
-
-    Scalar m_r_cut; //!< Cut-off distance in approximation domain
+    GPUArray<Scalar2> m_domain; //!< Approximation domain (5x2): 5 rows, each (min, max)
 
-    Scalar m_nlist_r_cut; //!< Effective neighbor-list cutoff = ceil(max(r0_data) + r_cut)
+    Scalar m_r_cut;       //!< Cut-off distance in approximation domain
+    Scalar m_nlist_r_cut; //!< Neighbor-list cutoff = ceil(max(r0) + r_cut)
 
-    /// r_cut matrix shared with the neighbor list (subscriber pattern)
-    std::shared_ptr<GPUArray<Scalar>> m_r_cut_nlist;
-
-    /// Track whether we have attached to the Simulation object
-    bool m_attached = true;
+    std::shared_ptr<GPUArray<Scalar>> m_r_cut_nlist; //!< r_cut matrix shared with nlist
+    bool m_attached = true;                          //!< Whether attached to the simulation
 
     GPUArray<unsigned int> m_terms; //!< Chebyshev term list (Nterms x 6)
     GPUArray<Scalar> m_coeffs;      //!< Coefficients corresponding to each term
@@ -99,12 +148,570 @@ class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
     GPUArray<Scalar> m_r0_data;        //!< R0 data
     GPUArray<unsigned int> m_r0_shape; //!< Points per dimension to sample r0
 
-    // methods
-
     void computeForces(uint64_t timestep) override;
     };
 
-    } // end namespace azplugins
-    } // end namespace hoomd
+// Constructor
+template<class ShapeSymmetryT>
+ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::ChebyshevAnisotropicPairPotential(
+    std::shared_ptr<SystemDefinition> sysdef,
+    std::shared_ptr<hoomd::md::NeighborList> nlist,
+    const Scalar* domain,
+    const Scalar r_cut,
+    const unsigned int* terms,
+    const Scalar* coeffs,
+    unsigned int Nterms,
+    const Scalar* r0_data,
+    const unsigned int* r0_shape)
+    : ForceCompute(sysdef), m_nlist(nlist), m_r_cut(r_cut), m_Nterms(Nterms)
+    {
+        {
+        GPUArray<Scalar2> domain_arr(5, m_exec_conf);
+        m_domain.swap(domain_arr);
+
+        ArrayHandle<Scalar2> h_domain(m_domain, access_location::host, access_mode::readwrite);
+        for (unsigned int d = 0; d < 5; ++d)
+            {
+            h_domain.data[d] = make_scalar2(domain[2 * d], domain[2 * d + 1]);
+            }
+        }
+
+        // terms: shape (Nterms, 6), stored flat
+        {
+        GPUArray<unsigned int> terms_arr(static_cast<size_t>(Nterms) * 6, m_exec_conf);
+        m_terms.swap(terms_arr);
+
+        ArrayHandle<unsigned int> h_terms(m_terms, access_location::host, access_mode::readwrite);
+        std::copy(terms, terms + static_cast<size_t>(Nterms) * 6, h_terms.data);
+        }
+
+        // coeffs: shape (Nterms,)
+        {
+        GPUArray<Scalar> coeffs_arr(Nterms, m_exec_conf);
+        m_coeffs.swap(coeffs_arr);
+
+        ArrayHandle<Scalar> h_coeffs(m_coeffs, access_location::host, access_mode::readwrite);
+        std::copy(coeffs, coeffs + Nterms, h_coeffs.data);
+        }
+
+        // r0_shape: length 5
+        {
+        GPUArray<unsigned int> shape_arr(5, m_exec_conf);
+        m_r0_shape.swap(shape_arr);
+
+        ArrayHandle<unsigned int> h_shape(m_r0_shape,
+                                          access_location::host,
+                                          access_mode::readwrite);
+        std::copy(r0_shape, r0_shape + 5, h_shape.data);
+        }
+
+    // r0_data: flat array, length = product(r0_shape)
+    unsigned int n_r0 = 1;
+    for (unsigned int d = 0; d < 5; ++d)
+        {
+        n_r0 *= r0_shape[d];
+        }
+
+        {
+        GPUArray<Scalar> r0_arr(n_r0, m_exec_conf);
+        m_r0_data.swap(r0_arr);
+
+        ArrayHandle<Scalar> h_r0(m_r0_data, access_location::host, access_mode::readwrite);
+        std::copy(r0_data, r0_data + n_r0, h_r0.data);
+        }
+
+    // neighbor list subscriber
+    Scalar max_r0 = *std::max_element(r0_data, r0_data + n_r0);
+    m_nlist_r_cut = std::ceil(max_r0 + m_r_cut);
+
+    m_r_cut_nlist = std::make_shared<GPUArray<Scalar>>(1, m_exec_conf);
+        {
+        ArrayHandle<Scalar> h_r_cut_nlist(*m_r_cut_nlist,
+                                          access_location::host,
+                                          access_mode::overwrite);
+        h_r_cut_nlist.data[0] = m_nlist_r_cut;
+        }
+    m_nlist->addRCutMatrix(m_r_cut_nlist);
+    m_nlist->notifyRCutMatrixChange();
+    }
+
+// Destructor
+template<class ShapeSymmetryT>
+ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::~ChebyshevAnisotropicPairPotential()
+    {
+    if (m_attached)
+        {
+        m_nlist->removeRCutMatrix(m_r_cut_nlist);
+        }
+    }
+
+// notifyDetach
+template<class ShapeSymmetryT>
+void ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::notifyDetach()
+    {
+    if (m_attached)
+        {
+        m_nlist->removeRCutMatrix(m_r_cut_nlist);
+        }
+    m_attached = false;
+    }
+
+// computeForces
+template<class ShapeSymmetryT>
+void ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::computeForces(uint64_t timestep)
+    {
+    // start by updating the neighborlist
+    m_nlist->compute(timestep);
+
+    // check neighbor list storage mode
+    const bool third_law = (m_nlist->getStorageMode() == hoomd::md::NeighborList::half);
+    // access neighbor list, particle data, and simulation box
+    ArrayHandle<unsigned int> h_n_neigh(m_nlist->getNNeighArray(),
+                                        access_location::host,
+                                        access_mode::read);
+    ArrayHandle<unsigned int> h_nlist(m_nlist->getNListArray(),
+                                      access_location::host,
+                                      access_mode::read);
+    ArrayHandle<size_t> h_head_list(m_nlist->getHeadList(),
+                                    access_location::host,
+                                    access_mode::read);
+    ArrayHandle<Scalar4> h_pos(m_pdata->getPositions(), access_location::host, access_mode::read);
+    ArrayHandle<Scalar4> h_orientation(m_pdata->getOrientationArray(),
+                                       access_location::host,
+                                       access_mode::read);
+    ArrayHandle<Scalar2> h_domain(m_domain, access_location::host, access_mode::read);
+    ArrayHandle<Scalar> h_r0_data(m_r0_data, access_location::host, access_mode::read);
+    ArrayHandle<unsigned int> h_r0_shape(m_r0_shape, access_location::host, access_mode::read);
+    ArrayHandle<unsigned int> h_terms(m_terms, access_location::host, access_mode::read);
+    ArrayHandle<Scalar> h_coeffs(m_coeffs, access_location::host, access_mode::read);
+
+    const BoxDim box = m_pdata->getGlobalBox();
+    const Scalar nlist_rcutsq = m_nlist_r_cut * m_nlist_r_cut;
+    const Scalar fd_step = Scalar(1.0e-6);
+
+    LinearInterpolator5D<Scalar> interp(h_r0_data.data, h_r0_shape.data, h_domain.data);
+
+    // Determine the maximum Chebyshev degree needed for each of the 6 coordinates
+    unsigned int max_deg[6] = {0, 0, 0, 0, 0, 0};
+    for (unsigned int t = 0; t < m_Nterms; ++t)
+        {
+        for (unsigned int c = 0; c < 6; ++c)
+            {
+            const unsigned int deg = h_terms.data[t * 6 + c];
+            if (deg > max_deg[c])
+                max_deg[c] = deg;
+            }
+        }
+
+    // Chain-rule scale factors: d(x_scaled)/d(x) = 2 / (hi - lo)
+    Scalar cheb_scale[6];
+    cheb_scale[0] = Scalar(2);
+    for (unsigned int d = 0; d < 5; ++d)
+        {
+        cheb_scale[d + 1] = Scalar(2) / (h_domain.data[d].y - h_domain.data[d].x);
+        }
+
+    // Flat 1D Chebyshev storage
+    unsigned int max_deg_global = 0;
+    for (unsigned int c = 0; c < 6; ++c)
+        {
+        if (max_deg[c] > max_deg_global)
+            max_deg_global = max_deg[c];
+        }
+
+    const Index2D cheb_idx(max_deg_global + 1, 6);
+    std::vector<Scalar> cheb_T_flat(cheb_idx.getNumElements());
+    std::vector<Scalar> cheb_dT_flat(cheb_idx.getNumElements());
+
+    m_force.zeroFill();
+    m_torque.zeroFill();
+
+    ArrayHandle<Scalar4> h_force(m_force, access_location::host, access_mode::readwrite);
+    ArrayHandle<Scalar4> h_torque(m_torque, access_location::host, access_mode::readwrite);
+
+    const unsigned int N = m_pdata->getN();
+
+    //! Euler-angle singularity tolerance for the alpha/gamma extraction.
+    const Scalar euler_singularity_tol = Scalar(1e-7);
+
+    //! beta threshold for the Jacobian (avoids 1/sin(beta) singulrity).
+    const Scalar beta_tol = Scalar(1e-5);
+
+    for (unsigned int i = 0; i < N; ++i)
+        {
+        // Particle i position and orientation
+        const Scalar3 pos_i = make_scalar3(h_pos.data[i].x, h_pos.data[i].y, h_pos.data[i].z);
+        const quat<Scalar> q_i(h_orientation.data[i]);
+        const quat<Scalar> q_i_conj = conj(q_i);
+
+        // Initialize particle force, torque, and energy
+        Scalar3 fi = make_scalar3(0, 0, 0);
+        Scalar3 ti = make_scalar3(0, 0, 0);
+        Scalar pei = Scalar(0);
+
+        const size_t myHead = h_head_list.data[i];
+        const unsigned int size = (unsigned int)h_n_neigh.data[i];
+
+        for (unsigned int k = 0; k < size; ++k)
+            {
+            // Access the index
+            const unsigned int j = h_nlist.data[myHead + k];
+            assert(j < m_pdata->getN() + m_pdata->getNGhosts());
+
+            const Scalar3 pos_j = make_scalar3(h_pos.data[j].x, h_pos.data[j].y, h_pos.data[j].z);
+            Scalar3 dx = pos_i - pos_j;
+            // Apply periodic boundary conditions
+            dx = box.minImage(dx);
+            // Neighbor-list cutoff check (center-center distance).
+            const Scalar rsq = dot(dx, dx);
+            if (rsq > nlist_rcutsq)
+                {
+                continue;
+                }
+
+            // particle j, orientation quaternion
+            const quat<Scalar> q_j(h_orientation.data[j]);
+            // dx is in lab frame, so rotate dx by conj(q_i)
+            const vec3<Scalar> dx_body = rotate(q_i_conj, vec3<Scalar>(dx));
+
+            // Relative orientation of j with respect to i:
+            //     q_rel = conj(q_i) * q_j
+            // ref:
+            // https://www.mathworks.com/help/fusion/ug/rotations-orientation-and-quaternions.html
+            const quat<Scalar> q_rel = q_i_conj * q_j;
+
+            // Convert position to spherical coordinates
+            // Skip overlapping particles.
+            const Scalar r = fast::sqrt(dot(dx_body, dx_body));
+            if (r < Scalar(1e-12))
+                {
+                continue;
+                }
+
+            Scalar theta = std::atan2(dx_body.y, dx_body.x);
+            if (theta < Scalar(0))
+                theta += Scalar(2.0) * M_PI;
+
+            Scalar cosphi = dx_body.z / r;
+            if (cosphi < Scalar(-1))
+                cosphi = Scalar(-1);
+            else if (cosphi > Scalar(1))
+                cosphi = Scalar(1);
+            Scalar phi = slow::acos(cosphi);
+
+            // Build rotation matrix from the relative quaternion and extract
+            // ZXZ Euler angles (alpha, beta, gamma)
+            const rotmat3<Scalar> R(q_rel);
+
+            Scalar alpha = Scalar(0);
+            Scalar beta = Scalar(0);
+            Scalar gamma = Scalar(0);
+
+            Scalar clamped_r22 = R.row2.z;
+            if (clamped_r22 < Scalar(-1))
+                clamped_r22 = Scalar(-1);
+            else if (clamped_r22 > Scalar(1))
+                clamped_r22 = Scalar(1);
+            beta = slow::acos(clamped_r22);
+
+            if (beta > euler_singularity_tol && beta < Scalar(M_PI) - euler_singularity_tol)
+                {
+                alpha = std::atan2(R.row0.z, -R.row1.z);
+                gamma = std::atan2(R.row2.x, R.row2.y);
+                if (alpha < Scalar(0))
+                    alpha += Scalar(2) * M_PI;
+                }
+            else
+                {
+                alpha = Scalar(0);
+                gamma
+                    = std::atan2((beta <= euler_singularity_tol) ? R.row0.y : -R.row0.y, R.row0.x);
+                }
+
+            if (gamma < Scalar(0))
+                gamma += Scalar(2) * M_PI;
+
+            // Symmetry reduction
+            // The symmetry evaluator maps (theta, phi, alpha, beta, gamma)
+            // into the reduced domain and returns the cumulative
+            // quaternion rotation that was applied.  We keep the transformation so
+            // that forces and torques can be rotated back to the
+            // original frame at the end.
+            const quat<Scalar> sym_transformation
+                = ShapeSymmetryT::reduce(theta, phi, alpha, beta, gamma);
+
+            // Move phi and beta away from 0 and pi to avoid 1/sin(beta or phi)
+            // singularity in the Jacobian (used the same threshold as beta).
+            if (beta < beta_tol)
+                beta = beta_tol;
+            else if (beta > Scalar(M_PI) - beta_tol)
+                beta = Scalar(M_PI) - beta_tol;
+
+            if (phi < beta_tol)
+                phi = beta_tol;
+            else if (phi > Scalar(M_PI) - beta_tol)
+                phi = Scalar(M_PI) - beta_tol;
+
+            // Compute r0 and all 5 derivatives
+            Scalar r0;
+            Scalar dr0[5];
+            interp.valueAndDerivatives(theta, phi, alpha, beta, gamma, fd_step, r0, dr0);
+            const Scalar dr0_dtheta = dr0[0];
+            const Scalar dr0_dphi = dr0[1];
+            const Scalar dr0_dalpha = dr0[2];
+            const Scalar dr0_dbeta = dr0[3];
+            const Scalar dr0_dgamma = dr0[4];
+
+            // Compute rho
+            const Scalar inv_r = Scalar(1) / r;
+            const Scalar inv_r0 = Scalar(1) / r0;
+            const Scalar inv_r0_rcut = Scalar(1) / (r0 + m_r_cut);
+            const Scalar rho_denom = inv_r0_rcut - inv_r0;
+            const Scalar rho_num = inv_r - inv_r0;
+            Scalar rho = rho_num / rho_denom;
+
+            if (rho > Scalar(1))
+                continue;
+
+            // Save raw rho for energy extrapolation if rho < 0
+            const Scalar rho_energy = rho;
+            if (rho < Scalar(0))
+                rho = Scalar(0);
+
+            // Compute drho/dr and drho/dr0
+            const Scalar inv_r_sq = inv_r * inv_r;
+            const Scalar inv_r0_sq = inv_r0 * inv_r0;
+            const Scalar inv_r0_rcut_sq = inv_r0_rcut * inv_r0_rcut;
+            const Scalar rho_denom_sq = rho_denom * rho_denom;
+
+            const Scalar drho_dr = -inv_r_sq / rho_denom;
+            const Scalar drho_dr0
+                = (inv_r0_sq * rho_denom - rho_num * (inv_r0_sq - inv_r0_rcut_sq)) / rho_denom_sq;
+
+            // Chebyshev evaluation: scale each coordinate to [-1,1]
+            // and evaluate polynomials + derivatives up to max degree.
+            chebEvaluate(chebScale(rho, Scalar(0), Scalar(1)),
+                         max_deg[0],
+                         cheb_T_flat.data() + cheb_idx(0, 0),
+                         cheb_dT_flat.data() + cheb_idx(0, 0));
+
+            const Scalar ang_coords[5] = {theta, phi, alpha, beta, gamma};
+            for (unsigned int c = 0; c < 5; ++c)
+                {
+                chebEvaluate(chebScale(ang_coords[c], h_domain.data[c].x, h_domain.data[c].y),
+                             max_deg[c + 1],
+                             cheb_T_flat.data() + cheb_idx(0, c + 1),
+                             cheb_dT_flat.data() + cheb_idx(0, c + 1));
+                }
+
+            // Evaluate u and du/d(coord_k)
+            Scalar u = Scalar(0);
+            Scalar du[6] = {Scalar(0), Scalar(0), Scalar(0), Scalar(0), Scalar(0), Scalar(0)};
+
+            for (unsigned int t = 0; t < m_Nterms; ++t)
+                {
+                const unsigned int* degs = h_terms.data + 6 * t;
+                const Scalar coeff = h_coeffs.data[t];
+
+                Scalar T_vals[6];
+                Scalar dT_vals[6];
+                for (unsigned int c = 0; c < 6; ++c)
+                    {
+                    T_vals[c] = cheb_T_flat[cheb_idx(degs[c], c)];
+                    dT_vals[c] = cheb_dT_flat[cheb_idx(degs[c], c)];
+                    }
+
+                Scalar prefix[7];
+                prefix[0] = Scalar(1);
+                for (unsigned int c = 0; c < 6; ++c)
+                    prefix[c + 1] = prefix[c] * T_vals[c];
+
+                Scalar suffix[7];
+                suffix[6] = Scalar(1);
+                for (int c = 5; c >= 0; --c)
+                    suffix[c] = suffix[c + 1] * T_vals[c];
+
+                u += coeff * prefix[6];
+
+                for (unsigned int c = 0; c < 6; ++c)
+                    du[c] += coeff * dT_vals[c] * cheb_scale[c] * prefix[c] * suffix[c + 1];
+                }
+
+            // Linear extrapolation for energy when rho < 0
+            const Scalar u_energy = (rho_energy < Scalar(0)) ? (u + rho_energy * du[0]) : u;
+
+            // Jacobian matrix J (6x6).
+            // J maps the potential-derivative vector
+            // [du/drho, du/dtheta, du/dphi, du/dalpha, du/dbeta, du/dgamma]
+            // to the lab-frame force and torque:
+            // [F_x, F_y, F_z, tau_x, tau_y, tau_z]
+            Scalar s_th, c_th;
+            fast::sincos(theta, s_th, c_th);
+            Scalar s_ph, c_ph;
+            fast::sincos(phi, s_ph, c_ph);
+            Scalar s_b, c_b;
+            fast::sincos(beta, s_b, c_b);
+            Scalar s_a, c_a;
+            fast::sincos(alpha, s_a, c_a);
+
+            const Scalar inv_r_s_ph = inv_r / s_ph;
+            const Scalar inv_s_b = Scalar(1) / s_b;
+
+            const Scalar A = drho_dr0 * dr0_dtheta * inv_r_s_ph;
+            const Scalar B = drho_dr0 * dr0_dphi * inv_r;
+            const Scalar C = drho_dr0 * dr0_dalpha * inv_s_b;
+            const Scalar D = drho_dr0 * dr0_dgamma * inv_s_b;
+
+            const Scalar f_x_red = (-c_th * s_ph * drho_dr + s_th * A - c_th * c_ph * B) * du[0]
+                                   + (s_th * inv_r_s_ph) * du[1] + (-c_th * c_ph * inv_r) * du[2];
+
+            const Scalar f_y_red = (-s_th * s_ph * drho_dr - c_th * A - s_th * c_ph * B) * du[0]
+                                   + (-c_th * inv_r_s_ph) * du[1] + (-s_th * c_ph * inv_r) * du[2];
+
+            const Scalar f_z_red = (-c_ph * drho_dr + s_ph * B) * du[0] + (s_ph * inv_r) * du[2];
+
+            const Scalar tau_x_red = (c_b * s_a * C - c_a * drho_dr0 * dr0_dbeta - s_a * D) * du[0]
+                                     + (c_b * s_a * inv_s_b) * du[3] + (-c_a) * du[4]
+                                     + (-s_a * inv_s_b) * du[5];
+
+            const Scalar tau_y_red = (-c_b * c_a * C - s_a * drho_dr0 * dr0_dbeta + c_a * D) * du[0]
+                                     + (-c_a * c_b * inv_s_b) * du[3] + (-s_a) * du[4]
+                                     + (c_a * inv_s_b) * du[5];
+
+            const Scalar tau_z_red = (-drho_dr0 * dr0_dalpha) * du[0] + (-Scalar(1)) * du[3];
+
+            // Rotate back to original frame
+            const quat<Scalar> sym_inv = conj(sym_transformation);
+            const vec3<Scalar> f_red(f_x_red, f_y_red, f_z_red);
+            const vec3<Scalar> tau_red(tau_x_red, tau_y_red, tau_z_red);
+            const vec3<Scalar> f_lab = rotate(sym_inv, f_red);
+            const vec3<Scalar> tau_lab = rotate(sym_inv, tau_red);
+
+            const Scalar f_x = f_lab.x;
+            const Scalar f_y = f_lab.y;
+            const Scalar f_z = f_lab.z;
+            const Scalar tau_x = tau_lab.x;
+            const Scalar tau_y = tau_lab.y;
+            const Scalar tau_z = tau_lab.z;
+
+            // Accumulate for particle i
+            fi.x += f_x;
+            fi.y += f_y;
+            fi.z += f_z;
+
+            ti.x += tau_x;
+            ti.y += tau_y;
+            ti.z += tau_z;
+
+            pei += u_energy;
+
+            // Newton's third law for half neighbor list
+            if (third_law)
+                {
+                h_force.data[j].x -= f_x;
+                h_force.data[j].y -= f_y;
+                h_force.data[j].z -= f_z;
+                h_force.data[j].w += Scalar(0.5) * u_energy;
+
+                h_torque.data[j].x -= tau_x;
+                h_torque.data[j].y -= tau_y;
+                h_torque.data[j].z -= tau_z;
+                }
+            }
+
+        h_force.data[i].x += fi.x;
+        h_force.data[i].y += fi.y;
+        h_force.data[i].z += fi.z;
+        h_force.data[i].w += Scalar(0.5) * pei;
+
+        h_torque.data[i].x += ti.x;
+        h_torque.data[i].y += ti.y;
+        h_torque.data[i].z += ti.z;
+        h_torque.data[i].w += Scalar(0.0);
+        }
+    }
+
+namespace detail
+    {
+
+//! Export one subclass of ChebyshevAnisotropicPairPotential to python.
+/*!
+ * \param m    pybind11 module.
+ * \param name Name the class should have in the python module (must be
+ *             unique per symmetry).
+ * \tparam ShapeSymmetryT Symmetry evaluator type.
+ */
+template<class ShapeSymmetryT>
+void export_ChebyshevAnisotropicPairPotential(pybind11::module& m, const std::string& name)
+    {
+    namespace py = pybind11;
+    using NL = hoomd::md::NeighborList;
+    using Pot = ChebyshevAnisotropicPairPotential<ShapeSymmetryT>;
+
+    py::class_<Pot, ForceCompute, std::shared_ptr<Pot>>(m, name.c_str())
+        .def(py::init(
+            [](std::shared_ptr<SystemDefinition> sysdef,
+               std::shared_ptr<NL> nlist,
+               py::array_t<Scalar, py::array::c_style | py::array::forcecast> domain,
+               Scalar r_cut,
+               py::array_t<unsigned int, py::array::c_style | py::array::forcecast> terms,
+               py::array_t<Scalar, py::array::c_style | py::array::forcecast> coeffs,
+               py::array_t<Scalar, py::array::c_style | py::array::forcecast> r0_data)
+            {
+                // Domain must be (5,2) - rho is always in (0, 1)
+                if (domain.ndim() != 2 || domain.shape(0) != 5 || domain.shape(1) != 2)
+                    {
+                    throw std::runtime_error("domain must have shape (5,2).");
+                    }
+
+                // Terms must be (Nterms,6)
+                if (terms.ndim() != 2 || terms.shape(1) != 6)
+                    {
+                    throw std::runtime_error("terms must have shape (Nterms,6).");
+                    }
+
+                const unsigned int Nterms = static_cast<unsigned int>(terms.shape(0));
+
+                // Coeffs must be (Nterms,)
+                if (coeffs.ndim() != 1 || static_cast<unsigned int>(coeffs.shape(0)) != Nterms)
+                    {
+                    throw std::runtime_error("coeffs must have shape (Nterms,).");
+                    }
+
+                // r0_data must be 5D
+                if (r0_data.ndim() != 5)
+                    {
+                    throw std::runtime_error("r0_data must be a 5D array.");
+                    }
+
+                // Infer r0_shape from r0_data.shape
+                std::array<unsigned int, 5> r0_shape;
+                for (unsigned int k = 0; k < 5; ++k)
+                    {
+                    const auto dim = r0_data.shape(k);
+                    if (dim < 2)
+                        {
+                        throw std::runtime_error("r0_data has invalid dimension(s).");
+                        }
+                    r0_shape[k] = static_cast<unsigned int>(dim);
+                    }
+
+                return std::make_shared<Pot>(sysdef,
+                                             nlist,
+                                             domain.data(),
+                                             r_cut,
+                                             terms.data(),
+                                             coeffs.data(),
+                                             Nterms,
+                                             r0_data.data(),
+                                             r0_shape.data());
+            }))
+        .def_property_readonly("r_cut", &Pot::getRCut)
+        .def_property_readonly("num_terms", &Pot::getNTerms);
+    }
+
+    } // end namespace detail
+    } // namespace azplugins
+    } // namespace hoomd
 
 #endif // AZPLUGINS_CHEBYSHEV_ANISOTROPIC_PAIR_POTENTIAL_H_
diff --git a/src/ShapeSymmetry.h b/src/ShapeSymmetry.h
index 4623e02..248daa1 100644
--- a/src/ShapeSymmetry.h
+++ b/src/ShapeSymmetry.h
@@ -101,7 +101,7 @@ eulerFromQuat(const quat<Scalar>& q, Scalar& alpha, Scalar& beta, Scalar& gamma)
         }
     else
         {
-        beta = std::acos(R.row2.z);
+        beta = slow::acos(R.row2.z);
         }
 
     if (beta > tol && beta < Scalar(M_PI) - tol)
@@ -140,8 +140,6 @@ class ShapeSymmetryNull
     static constexpr Scalar domain_upper[5]
         = {Scalar(2.0 * M_PI), Scalar(M_PI), Scalar(2.0 * M_PI), Scalar(M_PI), Scalar(2.0 * M_PI)};
 
-    AZPLUGINS_HOSTDEVICE ShapeSymmetryNull() { }
-
 #ifndef __HIPCC__
     static std::string getName()
         {
@@ -149,11 +147,11 @@ class ShapeSymmetryNull
         }
 #endif
 
-    AZPLUGINS_HOSTDEVICE quat<Scalar> reduce(Scalar& /*theta*/,
-                                             Scalar& /*phi*/,
-                                             Scalar& /*alpha*/,
-                                             Scalar& /*beta*/,
-                                             Scalar& /*gamma*/) const
+    AZPLUGINS_HOSTDEVICE static quat<Scalar> reduce(Scalar& /*theta*/,
+                                                    Scalar& /*phi*/,
+                                                    Scalar& /*alpha*/,
+                                                    Scalar& /*beta*/,
+                                                    Scalar& /*gamma*/)
         {
         return quat<Scalar>(Scalar(1), vec3<Scalar>(0, 0, 0));
         }
@@ -174,12 +172,6 @@ class ShapeSymmetryCube
                                                Scalar(0.9553166181245093),
                                                Scalar(M_PI / 2.0)};
 
-    AZPLUGINS_HOSTDEVICE ShapeSymmetryCube()
-        : m_rot_x_pi(detail::quatFromAxisAngle(vec3<Scalar>(1, 0, 0), Scalar(M_PI))),
-          m_rot_111(quat<Scalar>(Scalar(0.5), vec3<Scalar>(Scalar(0.5), Scalar(0.5), Scalar(0.5))))
-        {
-        }
-
 #ifndef __HIPCC__
     static std::string getName()
         {
@@ -187,21 +179,28 @@ class ShapeSymmetryCube
         }
 #endif
 
-    AZPLUGINS_HOSTDEVICE quat<Scalar>
-    reduce(Scalar& theta, Scalar& phi, Scalar& alpha, Scalar& beta, Scalar& gamma) const
+    AZPLUGINS_HOSTDEVICE static quat<Scalar>
+    reduce(Scalar& theta, Scalar& phi, Scalar& alpha, Scalar& beta, Scalar& gamma)
         {
+        // Rotation by pi around x
+        const quat<Scalar> rot_x_pi
+            = detail::quatFromAxisAngle(vec3<Scalar>(1, 0, 0), Scalar(M_PI));
+        // Rotation by 2 pi/3 around [1,1,1]/sqrt(3)
+        const quat<Scalar> rot_111(Scalar(0.5),
+                                   vec3<Scalar>(Scalar(0.5), Scalar(0.5), Scalar(0.5)));
+
         quat<Scalar> transformation(Scalar(1), vec3<Scalar>(0, 0, 0));
 
         vec3<Scalar> pos = detail::sphericalToCartesian(Scalar(1), theta, phi);
 
-        // if phi > pi/2, rotate by pi around x to flip z.
+        // If phi > pi/2, rotate by pi around x to flip z.
         if (phi > Scalar(M_PI) / Scalar(2))
             {
-            pos = rotate(m_rot_x_pi, pos);
-            transformation = m_rot_x_pi * transformation;
+            pos = rotate(rot_x_pi, pos);
+            transformation = rot_x_pi * transformation;
             }
 
-        // fold theta into [0, pi/2] by rotating around z.
+        // Fold theta into [0, pi/2] by rotating around z.
         Scalar r_tmp, th_tmp, ph_tmp;
         detail::cartesianToSpherical(pos, r_tmp, th_tmp, ph_tmp);
 
@@ -214,15 +213,15 @@ class ShapeSymmetryCube
             transformation = rot_z * transformation;
             }
 
-        // fold theta into [0, pi/4] using 120-degree rotations
-        // around the [111] body diagonal (up to 3 attempts).
+        // Fold theta into [0, pi/4] using 120-degree rotations around the
+        // [111] body diagonal (up to 3 attempts).
         const Scalar theta_max = Scalar(M_PI) / Scalar(4);
         unsigned int n_rot = 0;
         detail::cartesianToSpherical(pos, r_tmp, th_tmp, ph_tmp);
         while (n_rot < 3 && th_tmp > theta_max)
             {
-            pos = rotate(m_rot_111, pos);
-            transformation = m_rot_111 * transformation;
+            pos = rotate(rot_111, pos);
+            transformation = rot_111 * transformation;
             detail::cartesianToSpherical(pos, r_tmp, th_tmp, ph_tmp);
             ++n_rot;
             }
@@ -230,7 +229,7 @@ class ShapeSymmetryCube
         // Write back reduced position angles.
         detail::cartesianToSpherical(pos, r_tmp, theta, phi);
 
-        // apply cumulative transformation to orientation and
+        // Apply cumulative transformation to orientation and
         // select the best candidate from 3 rotations around [111].
         quat<Scalar> q_orient = detail::quatFromEulerZXZ(alpha, beta, gamma);
         quat<Scalar> q_cand = transformation * q_orient;
@@ -244,7 +243,7 @@ class ShapeSymmetryCube
             Scalar a, b, g;
             detail::eulerFromQuat(q_cand, a, b, g);
 
-            // if > pi/2, reflect.
+            // If beta > pi/2, reflect.
             if (b > Scalar(M_PI) / Scalar(2))
                 {
                 a = std::fmod(a + Scalar(M_PI), Scalar(2) * Scalar(M_PI));
@@ -265,8 +264,7 @@ class ShapeSymmetryCube
                 best_g = g;
                 }
 
-            // Rotate candidate to the next [111] orientation.
-            q_cand = q_cand * m_rot_111;
+            q_cand = q_cand * rot_111;
             }
 
         alpha = best_a;
@@ -275,10 +273,6 @@ class ShapeSymmetryCube
 
         return transformation;
         }
-
-    private:
-    quat<Scalar> m_rot_x_pi; //!< Rotation by pi around x
-    quat<Scalar> m_rot_111;  //!< Rotation by 2 pi/3 around [1,1,1]/sqrt(3)
     };
 
 //! Tetrahedron symmetry evaluator.
@@ -289,15 +283,13 @@ class ShapeSymmetryCube
 class ShapeSymmetryTetrahedron
     {
     public:
-    //! Upper bounds of the domain.
+    //! Upper bounds of the reduced domain.
     static constexpr Scalar domain_upper[5] = {Scalar(2.0 * M_PI / 3.0),
                                                Scalar(M_PI),
                                                Scalar(2.0 * M_PI),
                                                Scalar(M_PI),
                                                Scalar(2.0 * M_PI / 3.0)};
 
-    AZPLUGINS_HOSTDEVICE ShapeSymmetryTetrahedron() { }
-
 #ifndef __HIPCC__
     static std::string getName()
         {
@@ -305,14 +297,14 @@ class ShapeSymmetryTetrahedron
         }
 #endif
 
-    AZPLUGINS_HOSTDEVICE quat<Scalar>
-    reduce(Scalar& theta, Scalar& phi, Scalar& alpha, Scalar& beta, Scalar& gamma) const
+    AZPLUGINS_HOSTDEVICE static quat<Scalar>
+    reduce(Scalar& theta, Scalar& phi, Scalar& alpha, Scalar& beta, Scalar& gamma)
         {
         quat<Scalar> transformation(Scalar(1), vec3<Scalar>(0, 0, 0));
 
         const Scalar theta_fold = Scalar(2) * Scalar(M_PI) / Scalar(3);
 
-        // fold theta into [0, 2 pi/3] by rotating around z.
+        // Fold theta into [0, 2 pi/3] by rotating around z.
         if (theta > theta_fold)
             {
             vec3<Scalar> pos = detail::sphericalToCartesian(Scalar(1), theta, phi);
@@ -327,7 +319,7 @@ class ShapeSymmetryTetrahedron
             detail::cartesianToSpherical(pos, r_tmp, theta, phi);
             }
 
-        // apply transformation to orientation, extract Euler angles.
+        // Apply transformation to orientation, extract Euler angles.
         quat<Scalar> q_orient = detail::quatFromEulerZXZ(alpha, beta, gamma);
         quat<Scalar> q_transformed = transformation * q_orient;
         detail::eulerFromQuat(q_transformed, alpha, beta, gamma);
diff --git a/src/export_ChebyshevAnisotropicPairPotential.cc.inc b/src/export_ChebyshevAnisotropicPairPotential.cc.inc
new file mode 100644
index 0000000..4cc6507
--- /dev/null
+++ b/src/export_ChebyshevAnisotropicPairPotential.cc.inc
@@ -0,0 +1,33 @@
+// Copyright (c) 2018-2020, Michael P. Howard
+// Copyright (c) 2021-2025, Auburn University
+// Part of azplugins, released under the BSD 3-Clause License.
+
+// Adapted from hoomd/md/export_PotentialPair.cc.inc of HOOMD-blue.
+// Copyright (c) 2009-2024 The Regents of the University of Michigan.
+// Part of HOOMD-blue, released under the BSD 3-Clause License.
+
+// See CMakeLists.txt for the source of these variables to be processed by CMake's
+// configure_file().
+
+// clang-format off
+#include "ChebyshevAnisotropicPairPotential.h"
+
+#define SYMMETRY_CLASS ShapeSymmetry@_symmetry@
+#define EXPORT_FUNCTION export_ChebyshevAnisotropicPairPotential@_symmetry@
+// clang-format on
+
+namespace hoomd
+    {
+namespace azplugins
+    {
+namespace detail
+    {
+void EXPORT_FUNCTION(pybind11::module& m)
+    {
+    export_ChebyshevAnisotropicPairPotential<SYMMETRY_CLASS>(
+        m,
+        "ChebyshevAnisotropicPairPotential@_symmetry@");
+    }
+    } // end namespace detail
+    } // end namespace azplugins
+    } // end namespace hoomd
diff --git a/src/module.cc b/src/module.cc
index d22f6dd..3ecfcfe 100644
--- a/src/module.cc
+++ b/src/module.cc
@@ -69,7 +69,9 @@ void export_ParabolicFlow(pybind11::module&);
 
 // pair
 void export_AnisoPotentialPairTwoPatchMorse(pybind11::module&);
-void export_ChebyshevAnisotropicPairPotential(pybind11::module&);
+void export_ChebyshevAnisotropicPairPotentialNull(pybind11::module&);
+void export_ChebyshevAnisotropicPairPotentialCube(pybind11::module&);
+void export_ChebyshevAnisotropicPairPotentialTetrahedron(pybind11::module&);
 void export_PotentialPairColloid(pybind11::module&);
 void export_PotentialPairExpandedYukawa(pybind11::module&);
 void export_PotentialPairHertz(pybind11::module&);
@@ -142,7 +144,9 @@ PYBIND11_MODULE(_azplugins, m)
 
     // pair
     export_AnisoPotentialPairTwoPatchMorse(m);
-    export_ChebyshevAnisotropicPairPotential(m);
+    export_ChebyshevAnisotropicPairPotentialNull(m);
+    export_ChebyshevAnisotropicPairPotentialCube(m);
+    export_ChebyshevAnisotropicPairPotentialTetrahedron(m);
     export_PotentialPairColloid(m);
     export_PotentialPairExpandedYukawa(m);
     export_PotentialPairHertz(m);
diff --git a/src/pair.py b/src/pair.py
index 56319f1..bb86a30 100644
--- a/src/pair.py
+++ b/src/pair.py
@@ -14,7 +14,17 @@
 
 
 class ChebyshevAnisotropicPairPotential(Force):
-    """Chebyshev anisotropic pair potential."""
+    """Chebyshev anisotropic pair potential.
+
+    Base class for Chebyshev anisotropic pair potentials.  It corresponds to
+    the ``ShapeSymmetryNull`` (no symmetry reduction), so it
+    can be instantiated directly for shapes with no symmetry.  Shape-specific
+    subclasses (e.g. ``ChebyshevAnisotropicPairPotentialCube``) override
+    ``_cpp_class_name`` to select a different compiled C++ symmetry.
+    """
+
+    _ext_module = _azplugins
+    _cpp_class_name = "ChebyshevAnisotropicPairPotentialNull"
 
     def __init__(self, nlist, domain, terms, coeffs, r0, r_cut):
         super().__init__()
@@ -32,9 +42,9 @@ def __init__(self, nlist, domain, terms, coeffs, r0, r_cut):
         self.r0 = numpy.asarray(r0, dtype=numpy.float64)
 
         if self._domain.shape != (5, 2):
-            raise ValueError("domain must have shape (5,2).")
+            raise ValueError("domain must have shape (5, 2).")
         if self._terms.ndim != 2 or self._terms.shape[1] != 6:
-            raise ValueError("terms must have shape (Nterms,6).")
+            raise ValueError("terms must have shape (Nterms, 6).")
 
         n_terms = int(self._terms.shape[0])
         if self._coeffs.ndim != 1 or int(self._coeffs.shape[0]) != n_terms:
@@ -43,10 +53,16 @@ def __init__(self, nlist, domain, terms, coeffs, r0, r_cut):
         if self.r0.ndim != 5:
             raise ValueError("r0 must be a 5D array.")
 
+        if any(dim < 2 for dim in self.r0.shape):
+            raise ValueError(
+                "r0 must have at least 2 grid points along each of its 5 dimensions."
+            )
+
     def _attach_hook(self):
         self._nlist._attach(self._simulation)
 
-        self._cpp_obj = _azplugins.ChebyshevAnisotropicPairPotential(
+        cls = getattr(self._ext_module, self._cpp_class_name)
+        self._cpp_obj = cls(
             self._simulation.state._cpp_sys_def,
             self._nlist._cpp_obj,
             self._domain,
@@ -59,6 +75,28 @@ def _attach_hook(self):
         super()._attach_hook()
 
 
+class ChebyshevAnisotropicPairPotentialCube(ChebyshevAnisotropicPairPotential):
+    """Chebyshev anisotropic pair potential with cube symmetry reduction.
+
+    Reduced domain:
+    theta in [0, pi/4], phi in [0, pi/2], alpha in [0, 2 pi],
+    beta in [0, arccos(1/sqrt(3))], gamma in [0, pi/2].
+    """
+
+    _cpp_class_name = "ChebyshevAnisotropicPairPotentialCube"
+
+
+class ChebyshevAnisotropicPairPotentialTetrahedron(ChebyshevAnisotropicPairPotential):
+    """Chebyshev anisotropic pair potential with tetrahedron symmetry reduction.
+
+    Reduced domain:
+    theta in [0, 2 pi/3], phi in [0, pi], alpha in [0, 2 pi],
+    beta in [0, pi], gamma in [0, 2 pi/3].
+    """
+
+    _cpp_class_name = "ChebyshevAnisotropicPairPotentialTetrahedron"
+
+
 class Colloid(pair.Pair):
     r"""Colloid pair potential.
 
diff --git a/src/pytest/test_chebyshev.py b/src/pytest/test_chebyshev.py
index 8177547..ee459bf 100644
--- a/src/pytest/test_chebyshev.py
+++ b/src/pytest/test_chebyshev.py
@@ -3,11 +3,137 @@
 # Part of azplugins, released under the BSD 3-Clause License.
 
 import numpy
-import hoomd
-import hoomd.azplugins
 from scipy.interpolate import RegularGridInterpolator
 from scipy.spatial.transform import Rotation
 
+import hoomd
+import hoomd.azplugins
+
+import pytest
+
+# Parameters that are identical across every energy/force/torque test.
+rc = 3.0
+phi_min = 1e-5
+beta_min = 1e-5
+
+terms = numpy.array(
+    [
+        [0, 0, 0, 0, 0, 0],
+        [0, 0, 0, 1, 0, 0],
+        [1, 0, 0, 0, 0, 0],
+        [1, 0, 0, 1, 0, 0],
+    ],
+    dtype=numpy.uint32,
+)
+coeffs = numpy.array([1.0, 0.25, 1.5, -1.0], dtype=numpy.float64)
+
+# r0 data: shape (3, 2, 3, 2, 3) = 108 values.
+r0_data = numpy.array([1, 2.1, 3.2] * 36, dtype=numpy.float64).reshape(3, 2, 3, 2, 3)
+
+
+def rho_to_r(rho, r0, rc):
+    """Invert rho = (1/r - 1/r0) / (1/(r0+rc) - 1/r0) to recover r."""
+    inv_r0 = 1.0 / r0
+    inv_r0_rc = 1.0 / (r0 + rc)
+    inv_r = rho * (inv_r0_rc - inv_r0) + inv_r0
+    return 1.0 / inv_r
+
+
+def build_simulation(
+    simulation_factory,
+    two_particle_snapshot_factory,
+    pot_cls,
+    domain,
+    r0,
+    rho,
+    theta,
+    phi,
+    alpha,
+    beta,
+    gamma,
+):
+    """Place two particles at the prescribed coordinates and
+    return the attached potential.
+
+    Particle 0 sits at the origin with identity orientation.  Particle 1
+    is placed so that the C++ code sees (rho, theta, phi, alpha, beta,
+    gamma) as the pair's generalised coordinates.  For Null symmetry the
+    caller supplies ``r0`` from the test's own interpolator; for Cube /
+    Tetrahedron tests the caller supplies the reduced-frame ``r0``
+    directly because the input angles need not to be in the reduced
+    coordinates which interpolator expects.
+    """
+    snap = two_particle_snapshot_factory()
+    if snap.communicator.rank == 0:
+        r = rho_to_r(rho, r0, rc)
+
+        dx = r * numpy.sin(phi) * numpy.cos(theta)
+        dy = r * numpy.sin(phi) * numpy.sin(theta)
+        dz = r * numpy.cos(phi)
+
+        q_j = Rotation.from_euler("ZXZ", [alpha, beta, gamma]).as_quat(
+            scalar_first=True
+        )
+
+        snap.particles.position[0] = [0.0, 0.0, 0.0]
+        snap.particles.position[1] = [-dx, -dy, -dz]
+        snap.particles.orientation[0] = [1, 0, 0, 0]
+        snap.particles.orientation[1] = q_j
+        snap.particles.moment_inertia[:] = [0.1, 0.1, 0.1]
+
+    sim = simulation_factory(snap)
+
+    integrator = hoomd.md.Integrator(dt=0.001)
+    integrator.methods = [hoomd.md.methods.ConstantVolume(hoomd.filter.All())]
+
+    nlist = hoomd.md.nlist.Cell(buffer=1)
+    pot = pot_cls(
+        nlist=nlist,
+        domain=domain,
+        terms=terms,
+        coeffs=coeffs,
+        r0=r0_data,
+        r_cut=rc,
+    )
+
+    integrator.forces = [pot]
+    sim.operations.integrator = integrator
+    sim.run(0)
+    return sim, pot
+
+
+def check_pair(sim, pot, expected_energy, expected_force, expected_torque):
+    """Compare the C++ output on both particles."""
+    if sim.device.communicator.rank == 0:
+        half_e = 0.5 * expected_energy
+
+        numpy.testing.assert_allclose(pot.energies[0], half_e, atol=1e-3, rtol=1e-3)
+        numpy.testing.assert_allclose(
+            pot.forces[0], expected_force, atol=1e-3, rtol=1e-3
+        )
+        numpy.testing.assert_allclose(
+            pot.torques[0], expected_torque, atol=1e-3, rtol=1e-3
+        )
+
+        numpy.testing.assert_allclose(pot.energies[1], half_e, atol=1e-3, rtol=1e-3)
+        numpy.testing.assert_allclose(
+            pot.forces[1], -expected_force, atol=1e-3, rtol=1e-3
+        )
+        numpy.testing.assert_allclose(
+            pot.torques[1], -expected_torque, atol=1e-3, rtol=1e-3
+        )
+
+
+def check_zero_pair(sim, pot):
+    """Assert that both particles have zero force, torque, and energy."""
+    if sim.device.communicator.rank == 0:
+        numpy.testing.assert_allclose(pot.energies[0], 0.0, atol=1e-10)
+        numpy.testing.assert_allclose(pot.forces[0], [0.0, 0.0, 0.0], atol=1e-10)
+        numpy.testing.assert_allclose(pot.torques[0], [0.0, 0.0, 0.0], atol=1e-10)
+        numpy.testing.assert_allclose(pot.energies[1], 0.0, atol=1e-10)
+        numpy.testing.assert_allclose(pot.forces[1], [0.0, 0.0, 0.0], atol=1e-10)
+        numpy.testing.assert_allclose(pot.torques[1], [0.0, 0.0, 0.0], atol=1e-10)
+
 
 def test_chebyshev_construct_attach_zero(
     simulation_factory, two_particle_snapshot_factory
@@ -22,8 +148,7 @@ def test_chebyshev_construct_attach_zero(
     sim = simulation_factory(snap)
 
     integrator = hoomd.md.Integrator(dt=0.001)
-    nve = hoomd.md.methods.ConstantVolume(hoomd.filter.All())
-    integrator.methods = [nve]
+    integrator.methods = [hoomd.md.methods.ConstantVolume(hoomd.filter.All())]
 
     nlist = hoomd.md.nlist.Cell(buffer=0.4)
 
@@ -45,11 +170,10 @@ def test_chebyshev_construct_attach_zero(
         ],
         dtype=numpy.uint32,
     )
-
     coeffs = numpy.asarray([0.0, 0.0], dtype=numpy.float64)
-    # r0 must be 5D (and each dimension >= 2)
-    r0 = (numpy.arange(32, dtype=numpy.float64).reshape((2, 2, 2, 2, 2))) * 0.01
 
+    # r0 must be 5D (each dimension >= 2)
+    r0 = (numpy.arange(32, dtype=numpy.float64).reshape((2, 2, 2, 2, 2))) * 0.01
     r_cut = 3.0
 
     pot = hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(
@@ -63,6 +187,7 @@ def test_chebyshev_construct_attach_zero(
 
     integrator.forces = [pot]
     sim.operations.integrator = integrator
+
     # attach
     sim.run(0)
     # check if attach happened
@@ -78,14 +203,60 @@ def test_chebyshev_construct_attach_zero(
         numpy.testing.assert_array_equal(pot.energies, numpy.zeros((2,)))
 
 
-def test_chebyshev_force_torque_energy_no_symmetry(
+def good_kwargs():
+    """A set of constructor kwargs known to be valid."""
+    return dict(
+        nlist=hoomd.md.nlist.Cell(buffer=0.4),
+        domain=numpy.zeros((5, 2), dtype=numpy.float64),
+        terms=numpy.zeros((1, 6), dtype=numpy.uint32),
+        coeffs=numpy.zeros((1,), dtype=numpy.float64),
+        r0=numpy.zeros((2, 2, 2, 2, 2), dtype=numpy.float64),
+        r_cut=3.0,
+    )
+
+
+def test_chebyshev_invalid_terms_shape():
+    """Raise ValueError when ``terms`` is not (Nterms, 6)."""
+    kwargs = good_kwargs()
+    kwargs["terms"] = numpy.zeros((1, 5), dtype=numpy.uint32)
+    kwargs["coeffs"] = numpy.zeros((1,), dtype=numpy.float64)
+    with pytest.raises(ValueError, match=r"terms must have shape \(Nterms, 6\)\."):
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(**kwargs)
+
+
+def test_chebyshev_invalid_coeffs_shape():
+    """Raise ValueError when ``coeffs`` length does not match Nterms."""
+    kwargs = good_kwargs()
+    kwargs["terms"] = numpy.zeros((2, 6), dtype=numpy.uint32)
+    kwargs["coeffs"] = numpy.zeros((1,), dtype=numpy.float64)
+    with pytest.raises(ValueError, match=r"coeffs must have shape \(Nterms,\)"):
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(**kwargs)
+
+
+def test_chebyshev_invalid_r0_ndim():
+    """Raise ValueError when ``r0`` is not a 5D array."""
+    kwargs = good_kwargs()
+    kwargs["r0"] = numpy.zeros((2, 2, 2, 2), dtype=numpy.float64)
+    with pytest.raises(ValueError, match=r"r0 must be a 5D array\."):
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(**kwargs)
+
+
+def test_chebyshev_invalid_r0_shape():
+    """Raise ValueError when ``r0`` has a dimension with less than 2 points."""
+    kwargs = good_kwargs()
+    kwargs["r0"] = numpy.zeros((2, 2, 1, 2, 2), dtype=numpy.float64)
+    with pytest.raises(
+        ValueError,
+        match=r"r0 must have at least 2 grid points along each of its 5 dimensions\.",
+    ):
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(**kwargs)
+
+
+# Null symmetry
+def test_chebyshev_force_torque_energy_null_symmetry(
     simulation_factory, two_particle_snapshot_factory
 ):
-    """Test energy, force, and torque, without considering symmetry."""
-    rc = 3.0
-    phi_min = 1e-5
-    beta_min = 1e-5
-
+    """Force, torque, and energy with no symmetry reduction."""
     domain = numpy.array(
         [
             [0.0, 2.0 * numpy.pi],
@@ -97,22 +268,7 @@ def test_chebyshev_force_torque_energy_no_symmetry(
         dtype=numpy.float64,
     )
 
-    terms = numpy.array(
-        [
-            [0, 0, 0, 0, 0, 0],
-            [0, 0, 0, 1, 0, 0],
-            [1, 0, 0, 0, 0, 0],
-            [1, 0, 0, 1, 0, 0],
-        ],
-        dtype=numpy.uint32,
-    )
-    coeffs = numpy.array([1.0, 0.25, 1.5, -1.0], dtype=numpy.float64)
-    # r0 data: shape (3, 2, 3, 2, 3) = 108 values.
-    r0_data = numpy.array([1, 2.1, 3.2] * 36, dtype=numpy.float64).reshape(
-        3, 2, 3, 2, 3
-    )
-
-    # r0 interpolator
+    # r0 interpolator aligned with r0_data's shape (3, 2, 3, 2, 3).
     theta_grid = numpy.linspace(0, 2 * numpy.pi, 3)
     phi_grid = numpy.linspace(phi_min, numpy.pi - phi_min, 2)
     alpha_grid = numpy.linspace(0, 2 * numpy.pi, 3)
@@ -127,80 +283,24 @@ def test_chebyshev_force_torque_energy_no_symmetry(
         fill_value=numpy.nan,
     )
 
-    def rho_to_r(rho, r0, rc):
-        """Invert  rho = (1/r - 1/r0) / (1/(r0+rc) - 1/r0)  to obtain r."""
-        inv_r0 = 1.0 / r0
-        inv_r0_rc = 1.0 / (r0 + rc)
-        inv_r = rho * (inv_r0_rc - inv_r0) + inv_r0
-        return 1.0 / inv_r
-
-    def run_pair(rho, theta, phi, alpha, beta, gamma):
-        """Build a two-particle simulation, run for one step, and return
-        the potential object."""
-        snap = two_particle_snapshot_factory()
-        if snap.communicator.rank == 0:
-            r0 = float(r0_interp(numpy.array([theta, phi, alpha, beta, gamma]))[0])
-            r = rho_to_r(rho, r0, rc)
-
-            dx = r * numpy.sin(phi) * numpy.cos(theta)
-            dy = r * numpy.sin(phi) * numpy.sin(theta)
-            dz = r * numpy.cos(phi)
-
-            rot = Rotation.from_euler("ZXZ", [alpha, beta, gamma])
-            q_j = rot.as_quat(scalar_first=True)
-
-            snap.particles.position[0] = [0.0, 0.0, 0.0]
-            snap.particles.position[1] = [-dx, -dy, -dz]
-            snap.particles.orientation[0] = [1, 0, 0, 0]
-            snap.particles.orientation[1] = q_j
-            snap.particles.moment_inertia[:] = [0.1, 0.1, 0.1]
-
-        sim = simulation_factory(snap)
-
-        integrator = hoomd.md.Integrator(dt=0.001)
-        nve = hoomd.md.methods.ConstantVolume(hoomd.filter.All())
-        integrator.methods = [nve]
-
-        nlist = hoomd.md.nlist.Cell(buffer=1)
-        pot = hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(
-            nlist=nlist,
-            domain=domain,
-            terms=terms,
-            coeffs=coeffs,
-            r0=r0_data,
-            r_cut=rc,
+    def run(rho, theta, phi, alpha, beta, gamma):
+        r0 = float(r0_interp(numpy.array([theta, phi, alpha, beta, gamma]))[0])
+        return build_simulation(
+            simulation_factory,
+            two_particle_snapshot_factory,
+            hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential,
+            domain,
+            r0,
+            rho,
+            theta,
+            phi,
+            alpha,
+            beta,
+            gamma,
         )
 
-        integrator.forces = [pot]
-        sim.operations.integrator = integrator
-        sim.run(0)
-        return sim, pot
-
-    def check(sim, pot, expected_energy, expected_force, expected_torque):
-        """Compare both particles.  Particle 1 should obey Newton's third law."""
-        if sim.device.communicator.rank == 0:
-            half_e = 0.5 * expected_energy
-
-            # particle 0
-            numpy.testing.assert_allclose(pot.energies[0], half_e, atol=1e-3, rtol=1e-3)
-            numpy.testing.assert_allclose(
-                pot.forces[0], expected_force, atol=1e-3, rtol=1e-3
-            )
-            numpy.testing.assert_allclose(
-                pot.torques[0], expected_torque, atol=1e-3, rtol=1e-3
-            )
-
-            # particle 1 - Newton's third law
-            numpy.testing.assert_allclose(pot.energies[1], half_e, atol=1e-3, rtol=1e-3)
-            numpy.testing.assert_allclose(
-                pot.forces[1], -expected_force, atol=1e-3, rtol=1e-3
-            )
-            numpy.testing.assert_allclose(
-                pot.torques[1], -expected_torque, atol=1e-3, rtol=1e-3
-            )
-
     # point 1: interior
-    sim, pot = run_pair(
+    sim, pot = run(
         rho=0.2,
         theta=numpy.pi / 4,
         phi=numpy.pi / 4,
@@ -208,7 +308,7 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
         beta=numpy.pi / 2,
         gamma=numpy.pi,
     )
-    check(
+    check_pair(
         sim,
         pot,
         expected_energy=-0.41,
@@ -217,7 +317,7 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
     )
 
     # point 2: rho < 0 (clamped for derivatives, extrapolated for energy)
-    sim, pot = run_pair(
+    sim, pot = run(
         rho=-0.1,
         theta=numpy.pi / 4,
         phi=numpy.pi / 4,
@@ -225,7 +325,7 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
         beta=numpy.pi / 2,
         gamma=numpy.pi,
     )
-    check(
+    check_pair(
         sim,
         pot,
         expected_energy=-1.67,
@@ -234,7 +334,7 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
     )
 
     # point 3: phi at upper boundary
-    sim, pot = run_pair(
+    sim, pot = run(
         rho=0.0,
         theta=numpy.pi / 4,
         phi=numpy.pi - phi_min,
@@ -242,7 +342,7 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
         beta=numpy.pi / 2,
         gamma=numpy.pi,
     )
-    check(
+    check_pair(
         sim,
         pot,
         expected_energy=-1.583,
@@ -251,7 +351,7 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
     )
 
     # point 4: beta at lower boundary
-    sim, pot = run_pair(
+    sim, pot = run(
         rho=0.2,
         theta=numpy.pi / 4,
         phi=numpy.pi / 4,
@@ -259,7 +359,7 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
         beta=beta_min,
         gamma=numpy.pi,
     )
-    check(
+    check_pair(
         sim,
         pot,
         expected_energy=-0.41,
@@ -268,7 +368,7 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
     )
 
     # point 5: interior with rho near 1
-    sim, pot = run_pair(
+    sim, pot = run(
         rho=0.95,
         theta=numpy.pi / 4,
         phi=numpy.pi / 6,
@@ -276,7 +376,7 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
         beta=numpy.pi / 2,
         gamma=numpy.pi / 8,
     )
-    check(
+    check_pair(
         sim,
         pot,
         expected_energy=2.74,
@@ -285,7 +385,7 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
     )
 
     # point 6: rho > 1, beyond surface cutoff - all zeros
-    sim, pot = run_pair(
+    sim, pot = run(
         rho=1.05,
         theta=numpy.pi / 4,
         phi=numpy.pi / 6,
@@ -293,10 +393,352 @@ def check(sim, pot, expected_energy, expected_force, expected_torque):
         beta=numpy.pi / 2,
         gamma=numpy.pi / 8,
     )
-    if sim.device.communicator.rank == 0:
-        numpy.testing.assert_allclose(pot.energies[0], 0.0, atol=1e-10)
-        numpy.testing.assert_allclose(pot.forces[0], [0.0, 0.0, 0.0], atol=1e-10)
-        numpy.testing.assert_allclose(pot.torques[0], [0.0, 0.0, 0.0], atol=1e-10)
-        numpy.testing.assert_allclose(pot.energies[1], 0.0, atol=1e-10)
-        numpy.testing.assert_allclose(pot.forces[1], [0.0, 0.0, 0.0], atol=1e-10)
-        numpy.testing.assert_allclose(pot.torques[1], [0.0, 0.0, 0.0], atol=1e-10)
+    check_zero_pair(sim, pot)
+
+
+# Cube symmetry
+def test_chebyshev_force_torque_energy_cube_symmetry(
+    simulation_factory, two_particle_snapshot_factory
+):
+    """Force, torque, and energy with cube symmetry reduction.
+
+    Reduced domain: theta in [0, pi/4], phi in [0, pi/2],
+    alpha in [0, 2 pi], beta in [0, arccos(1/sqrt(3))],
+    gamma in [0, pi/2]."""
+    domain = numpy.array(
+        [
+            [0.0, numpy.pi / 4],
+            [phi_min, numpy.pi / 2],
+            [0.0, 2.0 * numpy.pi],
+            [beta_min, numpy.arccos(1 / numpy.sqrt(3))],
+            [0.0, numpy.pi / 2],
+        ],
+        dtype=numpy.float64,
+    )
+
+    def run(r0, rho, theta, phi, alpha, beta, gamma):
+        return build_simulation(
+            simulation_factory,
+            two_particle_snapshot_factory,
+            hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialCube,
+            domain,
+            r0,
+            rho,
+            theta,
+            phi,
+            alpha,
+            beta,
+            gamma,
+        )
+
+    # point 1: interior
+    sim, pot = run(
+        r0=2.46666667,
+        rho=0.2,
+        theta=numpy.pi / 8,
+        phi=numpy.pi / 5,
+        alpha=2 * numpy.pi / 5,
+        beta=numpy.pi / 6,
+        gamma=numpy.pi / 3,
+    )
+    check_pair(
+        sim,
+        pot,
+        expected_energy=-0.41,
+        expected_force=numpy.array([-1.335, -0.553, -1.989]),
+        expected_torque=numpy.array([7.395, -2.403, -0.271]),
+    )
+
+    # point 2: rho < 0 (clamped for derivatives, extrapolated for energy)
+    sim, pot = run(
+        r0=2.46666667,
+        rho=-0.1,
+        theta=numpy.pi / 8,
+        phi=numpy.pi / 5,
+        alpha=2 * numpy.pi / 5,
+        beta=numpy.pi / 6,
+        gamma=numpy.pi / 3,
+    )
+    check_pair(
+        sim,
+        pot,
+        expected_energy=-1.67,
+        expected_force=numpy.array([-1.875, -0.777, -2.793]),
+        expected_torque=numpy.array([9.579, -3.113, -0.398]),
+    )
+
+    # point 3: phi at upper boundary (outside domain), theta and beta
+    # also outside the domain
+    sim, pot = run(
+        r0=2.62072583,
+        rho=0.0,
+        theta=2 * numpy.pi / 7,
+        phi=numpy.pi / 9,
+        alpha=2 * numpy.pi / 15,
+        beta=2 * numpy.pi / 8,
+        gamma=numpy.pi / 5,
+    )
+    check_pair(
+        sim,
+        pot,
+        expected_energy=-0.751,
+        expected_force=numpy.array([-0.518, -0.65, -2.285]),
+        expected_torque=numpy.array([4.223, -0.398, 3.08]),
+    )
+
+    # point 4: theta out of bound
+    sim, pot = run(
+        r0=2.11254315,
+        rho=0.0,
+        theta=2 * numpy.pi / 7,
+        phi=2 * numpy.pi / 3,
+        alpha=2 * numpy.pi / 15,
+        beta=numpy.pi / 3,
+        gamma=numpy.pi / 5,
+    )
+    check_pair(
+        sim,
+        pot,
+        expected_energy=-0.427,
+        expected_force=numpy.array([-1.256, -1.575, 1.163]),
+        expected_torque=numpy.array([4.872, -0.906, 0.398]),
+    )
+
+    # point 5: beta at lower boundary, gamma outside the domain
+    sim, pot = run(
+        r0=1.0,
+        rho=0.2,
+        theta=numpy.pi / 4,
+        phi=numpy.pi / 4,
+        alpha=2 * numpy.pi / 5,
+        beta=beta_min,
+        gamma=numpy.pi,
+    )
+    check_pair(
+        sim,
+        pot,
+        expected_energy=-0.41,
+        expected_force=numpy.array([-2.023, -2.023, -2.861]),
+        expected_torque=numpy.array([6.31798953e05, -2.05283924e05, -0.271]),
+    )
+
+    # point 6: all angles outside the domain (except alpha)
+    sim, pot = run(
+        r0=1.0,
+        rho=0.95,
+        theta=numpy.pi,
+        phi=2 * numpy.pi / 3,
+        alpha=2 * numpy.pi / 5,
+        beta=2 * numpy.pi / 3,
+        gamma=2 * numpy.pi,
+    )
+    check_pair(
+        sim,
+        pot,
+        expected_energy=2.61,
+        expected_force=numpy.array([0.363, 0.0, 0.209]),
+        expected_torque=numpy.array([-1.135, 0.369, -0.207]),
+    )
+
+    # point 7: equivalent to point 6 but already in the reduced domain
+    sim, pot = run(
+        r0=1.0,
+        rho=0.95,
+        theta=0.0,
+        phi=1.0471975511965979,
+        alpha=1.8849555921538759,
+        beta=0.5235987755982987,
+        gamma=0.0,
+    )
+    check_pair(
+        sim,
+        pot,
+        expected_energy=2.61,
+        expected_force=numpy.array([-0.363, -0.0, -0.209]),
+        expected_torque=numpy.array([1.135, 0.369, 0.207]),
+    )
+
+    # point 8: rho > 1, beyond surface cutoff - all zeros
+    sim, pot = run(
+        r0=1.0,
+        rho=1.05,
+        theta=numpy.pi / 4,
+        phi=numpy.pi / 6,
+        alpha=2 * numpy.pi / 5,
+        beta=numpy.pi / 2,
+        gamma=numpy.pi / 8,
+    )
+    check_zero_pair(sim, pot)
+
+
+# Tetrahedron symmetry
+def test_chebyshev_force_torque_energy_tetrahedron_symmetry(
+    simulation_factory, two_particle_snapshot_factory
+):
+    """Force, torque, and energy with tetrahedron symmetry reduction.
+
+    Reduced domain: theta in [0, 2 pi/3], phi in [0, pi],
+    alpha in [0, 2 pi], beta in [0, pi], gamma in [0, 2 pi/3]."""
+    domain = numpy.array(
+        [
+            [0.0, 2 * numpy.pi / 3],
+            [phi_min, numpy.pi],
+            [0.0, 2.0 * numpy.pi],
+            [beta_min, numpy.pi],
+            [0.0, 2 * numpy.pi / 3],
+        ],
+        dtype=numpy.float64,
+    )
+
+    def run(r0, rho, theta, phi, alpha, beta, gamma):
+        return build_simulation(
+            simulation_factory,
+            two_particle_snapshot_factory,
+            hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialTetrahedron,
+            domain,
+            r0,
+            rho,
+            theta,
+            phi,
+            alpha,
+            beta,
+            gamma,
+        )
+
+    # point 1: interior
+    sim, pot = run(
+        r0=2.1,
+        rho=0.2,
+        theta=numpy.pi / 8,
+        phi=numpy.pi / 5,
+        alpha=2 * numpy.pi / 5,
+        beta=numpy.pi / 6,
+        gamma=numpy.pi / 3,
+    )
+    check_pair(
+        sim,
+        pot,
+        expected_energy=-0.41,
+        expected_force=numpy.array([-1.437, -0.595, -2.142]),
+        expected_torque=numpy.array([6.111, -1.985, -0.271]),
+    )
+
+    # point 2: rho < 0 (clamped for derivatives, extrapolated for energy)
+    sim, pot = run(
+        r0=2.1,
+        rho=-0.1,
+        theta=numpy.pi / 8,
+        phi=numpy.pi / 5,
+        alpha=2 * numpy.pi / 5,
+        beta=numpy.pi / 6,
+        gamma=numpy.pi / 3,
+    )
+    check_pair(
+        sim,
+        pot,
+        expected_energy=-1.67,
+        expected_force=numpy.array([-2.07, -0.857, -3.084]),
+        expected_torque=numpy.array([8.013, -2.604, -0.398]),
+    )
+
+    # point 3: phi at the boundary and theta is outside the domain
+    sim, pot = run(
+        r0=1.66,
+        rho=0.0,
+        theta=3 * numpy.pi / 2,
+        phi=phi_min,
+        alpha=2 * numpy.pi / 15,
+        beta=numpy.pi / 4,
+        gamma=numpy.pi / 5,
+    )
+    check_pair(
+        sim,
+        pot,
+        expected_energy=-0.75,
+        expected_force=numpy.array([0.0, 0.0, -3.182]),
+        expected_torque=numpy.array([2.084, -4.680, -0.398]),
+    )
+
+    # point 4: beta at the bound and gamma out of bound
+    sim, pot = run(
+        r0=2.1,
+        rho=0.65,
+        theta=numpy.pi / 4,
+        phi=numpy.pi / 4,
+        alpha=2 * numpy.pi / 5,
+        beta=beta_min,
+        gamma=numpy.pi,
+    )
+    check_pair(
+        sim,
+        pot,
+        expected_energy=1.48,
+        expected_force=numpy.array([-0.649, -0.649, -0.917]),
+        expected_torque=numpy.array([1.54802229e05, -5.02982930e04, 0.016]),
+    )
+
+    # point 5: phi and beta at boundary, gamma outside the domain
+    sim, pot = run(
+        r0=2.1,
+        rho=0.2,
+        theta=numpy.pi / 4,
+        phi=numpy.pi - phi_min,
+        alpha=2 * numpy.pi / 5,
+        beta=beta_min,
+        gamma=numpy.pi,
+    )
+    check_pair(
+        sim,
+        pot,
+        expected_energy=-0.41,
+        expected_force=numpy.array([0.0, 0.0, 2.647]),
+        expected_torque=numpy.array([3.08981190e05, -1.00394074e05, -0.271]),
+    )
+
+    # point 6: all angles outside the domain (except alpha)
+    sim, pot = run(
+        r0=1.0,
+        rho=0.95,
+        theta=numpy.pi,
+        phi=2 * numpy.pi / 3,
+        alpha=2 * numpy.pi / 5,
+        beta=2 * numpy.pi / 3,
+        gamma=2 * numpy.pi,
+    )
+    check_pair(
+        sim,
+        pot,
+        expected_energy=1.873,
+        expected_force=numpy.array([0.146, 0.0, 0.084]),
+        expected_torque=numpy.array([0.371, -0.120, 0.207]),
+    )
+
+    # point 7: equivalent to point 6 but already in the reduced domain
+    sim, pot = run(
+        r0=1.0,
+        rho=0.95,
+        theta=1.0471975511965979,
+        phi=2.0943951023931953,
+        alpha=5.445427266222309,
+        beta=2.0943951023931953,
+        gamma=0.0,
+    )
+    check_pair(
+        sim,
+        pot,
+        expected_energy=1.873,
+        expected_force=numpy.array([-0.073, -0.127, 0.084]),
+        expected_torque=numpy.array([-0.29, -0.261, 0.207]),
+    )
+
+    # point 8: rho > 1, beyond surface cutoff - all zeros
+    sim, pot = run(
+        r0=1.0,
+        rho=1.05,
+        theta=numpy.pi / 4,
+        phi=numpy.pi / 6,
+        alpha=2 * numpy.pi / 5,
+        beta=numpy.pi / 2,
+        gamma=numpy.pi / 8,
+    )
+    check_zero_pair(sim, pot)

From a46606152b41d7b0affd720187335f25711841e3 Mon Sep 17 00:00:00 2001
From: Mohammadreza <mzf0069@auburn.edu>
Date: Fri, 24 Apr 2026 10:39:11 -0500
Subject: [PATCH 12/13] Remove domain as user-defined variable

---
 src/ChebyshevAnisotropicPairPotential.h | 20 ++++---
 src/ShapeSymmetry.h                     | 21 ++++----
 src/pair.py                             | 14 ++---
 src/pytest/test_chebyshev.py            | 71 ++++---------------------
 4 files changed, 37 insertions(+), 89 deletions(-)

diff --git a/src/ChebyshevAnisotropicPairPotential.h b/src/ChebyshevAnisotropicPairPotential.h
index 6a8b6b6..88edea6 100644
--- a/src/ChebyshevAnisotropicPairPotential.h
+++ b/src/ChebyshevAnisotropicPairPotential.h
@@ -91,7 +91,6 @@ class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
     public:
     ChebyshevAnisotropicPairPotential(std::shared_ptr<SystemDefinition> sysdef,
                                       std::shared_ptr<hoomd::md::NeighborList> nlist,
-                                      const Scalar* domain,
                                       const Scalar r_cut,
                                       const unsigned int* terms,
                                       const Scalar* coeffs,
@@ -156,7 +155,6 @@ template<class ShapeSymmetryT>
 ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::ChebyshevAnisotropicPairPotential(
     std::shared_ptr<SystemDefinition> sysdef,
     std::shared_ptr<hoomd::md::NeighborList> nlist,
-    const Scalar* domain,
     const Scalar r_cut,
     const unsigned int* terms,
     const Scalar* coeffs,
@@ -170,9 +168,17 @@ ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::ChebyshevAnisotropicPairPoten
         m_domain.swap(domain_arr);
 
         ArrayHandle<Scalar2> h_domain(m_domain, access_location::host, access_mode::readwrite);
+        const Scalar angle_lower[5] = {
+            Scalar(0.0),    // theta
+            Scalar(1.0e-5), // phi
+            Scalar(0.0),    // alpha
+            Scalar(1.0e-5), // beta
+            Scalar(0.0)     // gamma
+        };
+
         for (unsigned int d = 0; d < 5; ++d)
             {
-            h_domain.data[d] = make_scalar2(domain[2 * d], domain[2 * d + 1]);
+            h_domain.data[d] = make_scalar2(angle_lower[d], ShapeSymmetryT::domain_upper[d]);
             }
         }
 
@@ -652,18 +658,11 @@ void export_ChebyshevAnisotropicPairPotential(pybind11::module& m, const std::st
         .def(py::init(
             [](std::shared_ptr<SystemDefinition> sysdef,
                std::shared_ptr<NL> nlist,
-               py::array_t<Scalar, py::array::c_style | py::array::forcecast> domain,
                Scalar r_cut,
                py::array_t<unsigned int, py::array::c_style | py::array::forcecast> terms,
                py::array_t<Scalar, py::array::c_style | py::array::forcecast> coeffs,
                py::array_t<Scalar, py::array::c_style | py::array::forcecast> r0_data)
             {
-                // Domain must be (5,2) - rho is always in (0, 1)
-                if (domain.ndim() != 2 || domain.shape(0) != 5 || domain.shape(1) != 2)
-                    {
-                    throw std::runtime_error("domain must have shape (5,2).");
-                    }
-
                 // Terms must be (Nterms,6)
                 if (terms.ndim() != 2 || terms.shape(1) != 6)
                     {
@@ -698,7 +697,6 @@ void export_ChebyshevAnisotropicPairPotential(pybind11::module& m, const std::st
 
                 return std::make_shared<Pot>(sysdef,
                                              nlist,
-                                             domain.data(),
                                              r_cut,
                                              terms.data(),
                                              coeffs.data(),
diff --git a/src/ShapeSymmetry.h b/src/ShapeSymmetry.h
index 248daa1..cd5f37a 100644
--- a/src/ShapeSymmetry.h
+++ b/src/ShapeSymmetry.h
@@ -130,15 +130,18 @@ eulerFromQuat(const quat<Scalar>& q, Scalar& alpha, Scalar& beta, Scalar& gamma)
 
 //! Null symmetry: no reduction.
 /*! Full natural domain:
-    theta in [0, 2 pi], phi in [0, pi], alpha in [0, 2 pi],
-    beta in [0, pi], gamma in [0, 2 pi].
+    theta in [0, 2 pi], phi in [1e-5, pi-1e-5], alpha in [0, 2 pi],
+    beta in [1e-5, pi-1e-5], gamma in [0, 2 pi].
 */
 class ShapeSymmetryNull
     {
     public:
-    //! Upper bounds of the reduced domain (lower bounds are always zero).
-    static constexpr Scalar domain_upper[5]
-        = {Scalar(2.0 * M_PI), Scalar(M_PI), Scalar(2.0 * M_PI), Scalar(M_PI), Scalar(2.0 * M_PI)};
+    //! Upper bounds of the reduced domain.
+    static constexpr Scalar domain_upper[5] = {Scalar(2.0 * M_PI),
+                                               Scalar(M_PI) - Scalar(1e-5),
+                                               Scalar(2.0 * M_PI),
+                                               Scalar(M_PI) - Scalar(1e-5),
+                                               Scalar(2.0 * M_PI)};
 
 #ifndef __HIPCC__
     static std::string getName()
@@ -159,8 +162,8 @@ class ShapeSymmetryNull
 
 //! Cube symmetry evaluator.
 /*! Reduced domain:
-    theta in [0, pi/4], phi in [0, pi/2], alpha in [0, 2 pi],
-    beta in [0, arccos(1/sqrt(3))], gamma in [0, pi/2].
+    theta in [0, pi/4], phi in [1e-5, pi/2], alpha in [0, 2 pi],
+    beta in [1e-5, arccos(1/sqrt(3))], gamma in [0, pi/2].
 */
 class ShapeSymmetryCube
     {
@@ -285,9 +288,9 @@ class ShapeSymmetryTetrahedron
     public:
     //! Upper bounds of the reduced domain.
     static constexpr Scalar domain_upper[5] = {Scalar(2.0 * M_PI / 3.0),
-                                               Scalar(M_PI),
+                                               Scalar(M_PI) - Scalar(1e-5),
                                                Scalar(2.0 * M_PI),
-                                               Scalar(M_PI),
+                                               Scalar(M_PI - Scalar(1e-5)),
                                                Scalar(2.0 * M_PI / 3.0)};
 
 #ifndef __HIPCC__
diff --git a/src/pair.py b/src/pair.py
index bb86a30..bdb22f8 100644
--- a/src/pair.py
+++ b/src/pair.py
@@ -26,7 +26,7 @@ class ChebyshevAnisotropicPairPotential(Force):
     _ext_module = _azplugins
     _cpp_class_name = "ChebyshevAnisotropicPairPotentialNull"
 
-    def __init__(self, nlist, domain, terms, coeffs, r0, r_cut):
+    def __init__(self, nlist, terms, coeffs, r0, r_cut):
         super().__init__()
 
         self._nlist = nlist
@@ -35,14 +35,11 @@ def __init__(self, nlist, domain, terms, coeffs, r0, r_cut):
         param_dict["r_cut"] = float(r_cut)
         self._param_dict.update(param_dict)
 
-        self._domain = numpy.asarray(domain, dtype=numpy.float64)
         self._terms = numpy.asarray(terms, dtype=numpy.uint32)
         self._coeffs = numpy.asarray(coeffs, dtype=numpy.float64)
 
         self.r0 = numpy.asarray(r0, dtype=numpy.float64)
 
-        if self._domain.shape != (5, 2):
-            raise ValueError("domain must have shape (5, 2).")
         if self._terms.ndim != 2 or self._terms.shape[1] != 6:
             raise ValueError("terms must have shape (Nterms, 6).")
 
@@ -65,7 +62,6 @@ def _attach_hook(self):
         self._cpp_obj = cls(
             self._simulation.state._cpp_sys_def,
             self._nlist._cpp_obj,
-            self._domain,
             self.r_cut,
             self._terms,
             self._coeffs,
@@ -79,8 +75,8 @@ class ChebyshevAnisotropicPairPotentialCube(ChebyshevAnisotropicPairPotential):
     """Chebyshev anisotropic pair potential with cube symmetry reduction.
 
     Reduced domain:
-    theta in [0, pi/4], phi in [0, pi/2], alpha in [0, 2 pi],
-    beta in [0, arccos(1/sqrt(3))], gamma in [0, pi/2].
+    theta in [0, pi/4], phi in [1e-5, pi/2], alpha in [0, 2 pi],
+    beta in [1e-5, arccos(1/sqrt(3))], gamma in [0, pi/2].
     """
 
     _cpp_class_name = "ChebyshevAnisotropicPairPotentialCube"
@@ -90,8 +86,8 @@ class ChebyshevAnisotropicPairPotentialTetrahedron(ChebyshevAnisotropicPairPoten
     """Chebyshev anisotropic pair potential with tetrahedron symmetry reduction.
 
     Reduced domain:
-    theta in [0, 2 pi/3], phi in [0, pi], alpha in [0, 2 pi],
-    beta in [0, pi], gamma in [0, 2 pi/3].
+    theta in [0, 2 pi/3], phi in [1e-5, pi-1e-5], alpha in [0, 2 pi],
+    beta in [1e-5, pi-1e-5], gamma in [0, 2 pi/3].
     """
 
     _cpp_class_name = "ChebyshevAnisotropicPairPotentialTetrahedron"
diff --git a/src/pytest/test_chebyshev.py b/src/pytest/test_chebyshev.py
index ee459bf..14282f5 100644
--- a/src/pytest/test_chebyshev.py
+++ b/src/pytest/test_chebyshev.py
@@ -43,7 +43,6 @@ def build_simulation(
     simulation_factory,
     two_particle_snapshot_factory,
     pot_cls,
-    domain,
     r0,
     rho,
     theta,
@@ -89,7 +88,6 @@ def build_simulation(
     nlist = hoomd.md.nlist.Cell(buffer=1)
     pot = pot_cls(
         nlist=nlist,
-        domain=domain,
         terms=terms,
         coeffs=coeffs,
         r0=r0_data,
@@ -152,17 +150,6 @@ def test_chebyshev_construct_attach_zero(
 
     nlist = hoomd.md.nlist.Cell(buffer=0.4)
 
-    domain = numpy.asarray(
-        [
-            [0.0, 2.0 * numpy.pi],
-            [0.0, numpy.pi],
-            [0.0, 2.0 * numpy.pi],
-            [0.0, numpy.pi],
-            [0.0, 2.0 * numpy.pi],
-        ],
-        dtype=numpy.float64,
-    )
-
     terms = numpy.asarray(
         [
             [0, 0, 0, 0, 0, 0],
@@ -177,7 +164,7 @@ def test_chebyshev_construct_attach_zero(
     r_cut = 3.0
 
     pot = hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(
-        nlist=nlist, domain=domain, terms=terms, coeffs=coeffs, r0=r0, r_cut=r_cut
+        nlist=nlist, terms=terms, coeffs=coeffs, r0=r0, r_cut=r_cut
     )
 
     assert numpy.isclose(pot.r_cut, r_cut)
@@ -207,7 +194,6 @@ def good_kwargs():
     """A set of constructor kwargs known to be valid."""
     return dict(
         nlist=hoomd.md.nlist.Cell(buffer=0.4),
-        domain=numpy.zeros((5, 2), dtype=numpy.float64),
         terms=numpy.zeros((1, 6), dtype=numpy.uint32),
         coeffs=numpy.zeros((1,), dtype=numpy.float64),
         r0=numpy.zeros((2, 2, 2, 2, 2), dtype=numpy.float64),
@@ -257,22 +243,11 @@ def test_chebyshev_force_torque_energy_null_symmetry(
     simulation_factory, two_particle_snapshot_factory
 ):
     """Force, torque, and energy with no symmetry reduction."""
-    domain = numpy.array(
-        [
-            [0.0, 2.0 * numpy.pi],
-            [phi_min, numpy.pi - phi_min],
-            [0.0, 2.0 * numpy.pi],
-            [beta_min, numpy.pi - phi_min],
-            [0.0, 2.0 * numpy.pi],
-        ],
-        dtype=numpy.float64,
-    )
-
     # r0 interpolator aligned with r0_data's shape (3, 2, 3, 2, 3).
     theta_grid = numpy.linspace(0, 2 * numpy.pi, 3)
     phi_grid = numpy.linspace(phi_min, numpy.pi - phi_min, 2)
     alpha_grid = numpy.linspace(0, 2 * numpy.pi, 3)
-    beta_grid = numpy.linspace(beta_min, numpy.pi - phi_min, 2)
+    beta_grid = numpy.linspace(beta_min, numpy.pi - beta_min, 2)
     gamma_grid = numpy.linspace(0, 2 * numpy.pi, 3)
 
     r0_interp = RegularGridInterpolator(
@@ -289,7 +264,6 @@ def run(rho, theta, phi, alpha, beta, gamma):
             simulation_factory,
             two_particle_snapshot_factory,
             hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential,
-            domain,
             r0,
             rho,
             theta,
@@ -402,26 +376,15 @@ def test_chebyshev_force_torque_energy_cube_symmetry(
 ):
     """Force, torque, and energy with cube symmetry reduction.
 
-    Reduced domain: theta in [0, pi/4], phi in [0, pi/2],
-    alpha in [0, 2 pi], beta in [0, arccos(1/sqrt(3))],
+    Reduced domain: theta in [0, pi/4], phi in [1e-5, pi/2],
+    alpha in [0, 2 pi], beta in [1e-5, arccos(1/sqrt(3))],
     gamma in [0, pi/2]."""
-    domain = numpy.array(
-        [
-            [0.0, numpy.pi / 4],
-            [phi_min, numpy.pi / 2],
-            [0.0, 2.0 * numpy.pi],
-            [beta_min, numpy.arccos(1 / numpy.sqrt(3))],
-            [0.0, numpy.pi / 2],
-        ],
-        dtype=numpy.float64,
-    )
 
     def run(r0, rho, theta, phi, alpha, beta, gamma):
         return build_simulation(
             simulation_factory,
             two_particle_snapshot_factory,
             hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialCube,
-            domain,
             r0,
             rho,
             theta,
@@ -467,15 +430,14 @@ def run(r0, rho, theta, phi, alpha, beta, gamma):
         expected_torque=numpy.array([9.579, -3.113, -0.398]),
     )
 
-    # point 3: phi at upper boundary (outside domain), theta and beta
-    # also outside the domain
+    # point 3: interior with rho=0
     sim, pot = run(
         r0=2.62072583,
         rho=0.0,
         theta=2 * numpy.pi / 7,
         phi=numpy.pi / 9,
         alpha=2 * numpy.pi / 15,
-        beta=2 * numpy.pi / 8,
+        beta=numpy.pi / 4,
         gamma=numpy.pi / 5,
     )
     check_pair(
@@ -486,7 +448,7 @@ def run(r0, rho, theta, phi, alpha, beta, gamma):
         expected_torque=numpy.array([4.223, -0.398, 3.08]),
     )
 
-    # point 4: theta out of bound
+    # point 4: theta, phi, and beta out of bound
     sim, pot = run(
         r0=2.11254315,
         rho=0.0,
@@ -577,25 +539,14 @@ def test_chebyshev_force_torque_energy_tetrahedron_symmetry(
 ):
     """Force, torque, and energy with tetrahedron symmetry reduction.
 
-    Reduced domain: theta in [0, 2 pi/3], phi in [0, pi],
-    alpha in [0, 2 pi], beta in [0, pi], gamma in [0, 2 pi/3]."""
-    domain = numpy.array(
-        [
-            [0.0, 2 * numpy.pi / 3],
-            [phi_min, numpy.pi],
-            [0.0, 2.0 * numpy.pi],
-            [beta_min, numpy.pi],
-            [0.0, 2 * numpy.pi / 3],
-        ],
-        dtype=numpy.float64,
-    )
+    Reduced domain: theta in [0, 2 pi/3], phi in [1e-5, pi],
+    alpha in [0, 2 pi], beta in [1e-5, pi], gamma in [0, 2 pi/3]."""
 
     def run(r0, rho, theta, phi, alpha, beta, gamma):
         return build_simulation(
             simulation_factory,
             two_particle_snapshot_factory,
             hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialTetrahedron,
-            domain,
             r0,
             rho,
             theta,
@@ -684,7 +635,7 @@ def run(r0, rho, theta, phi, alpha, beta, gamma):
         theta=numpy.pi / 4,
         phi=numpy.pi - phi_min,
         alpha=2 * numpy.pi / 5,
-        beta=beta_min,
+        beta=numpy.pi - beta_min,
         gamma=numpy.pi,
     )
     check_pair(
@@ -692,7 +643,7 @@ def run(r0, rho, theta, phi, alpha, beta, gamma):
         pot,
         expected_energy=-0.41,
         expected_force=numpy.array([0.0, 0.0, 2.647]),
-        expected_torque=numpy.array([3.08981190e05, -1.00394074e05, -0.271]),
+        expected_torque=numpy.array([2.57516972e05, -8.36723360e04, -0.271]),
     )
 
     # point 6: all angles outside the domain (except alpha)

From 0f7ec02666b93e8f8b613bd082b757ce338efcc5 Mon Sep 17 00:00:00 2001
From: Mohammadreza <mzf0069@auburn.edu>
Date: Sun, 17 May 2026 11:10:18 -0500
Subject: [PATCH 13/13] Implement suggestions and start GPU implementation

---
 src/CMakeLists.txt                            |    8 +
 src/ChebyshevAnisotropicPairPotential.h       |  308 ++---
 src/ChebyshevAnisotropicPairPotentialGPU.h    |  146 +++
 src/ShapeSymmetry.h                           |   18 +-
 ...hebyshevAnisotropicPairPotentialGPU.cc.inc |   39 +
 src/module.cc                                 |    6 +
 src/pair.py                                   |   13 +-
 src/pytest/test_chebyshev.py                  | 1113 ++++++++---------
 8 files changed, 919 insertions(+), 732 deletions(-)
 create mode 100644 src/ChebyshevAnisotropicPairPotentialGPU.h
 create mode 100644 src/export_ChebyshevAnisotropicPairPotentialGPU.cc.inc

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d266daa..c2c4499 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -173,6 +173,14 @@ foreach(_symmetry ${_chebyshev_symmetries})
                    @ONLY)
     set(_${COMPONENT_NAME}_sources ${_${COMPONENT_NAME}_sources}
         export_ChebyshevAnisotropicPairPotential${_symmetry}.cc)
+
+    if (ENABLE_HIP)
+        configure_file(export_ChebyshevAnisotropicPairPotentialGPU.cc.inc
+                       export_ChebyshevAnisotropicPairPotential${_symmetry}GPU.cc
+                       @ONLY)
+        set(_${COMPONENT_NAME}_sources ${_${COMPONENT_NAME}_sources}
+            export_ChebyshevAnisotropicPairPotential${_symmetry}GPU.cc)
+    endif()
 endforeach()
 
 # process velocity field geometries
diff --git a/src/ChebyshevAnisotropicPairPotential.h b/src/ChebyshevAnisotropicPairPotential.h
index 88edea6..fbf69eb 100644
--- a/src/ChebyshevAnisotropicPairPotential.h
+++ b/src/ChebyshevAnisotropicPairPotential.h
@@ -89,6 +89,9 @@ template<class ShapeSymmetryT>
 class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
     {
     public:
+    static constexpr unsigned int num_coordinates = 6;
+    static constexpr unsigned int num_angle_coordinates = num_coordinates - 1;
+
     ChebyshevAnisotropicPairPotential(std::shared_ptr<SystemDefinition> sysdef,
                                       std::shared_ptr<hoomd::md::NeighborList> nlist,
                                       const Scalar r_cut,
@@ -109,12 +112,6 @@ class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
         return m_nlist;
         }
 
-    /// 5x2 domain: stored as 5 entries of Scalar2 = (min,max)
-    const GPUArray<Scalar2>& getApproximationDomain() const
-        {
-        return m_domain;
-        }
-
     /// Read-only cutoff radius
     Scalar getRCut() const
         {
@@ -132,21 +129,29 @@ class PYBIND11_EXPORT ChebyshevAnisotropicPairPotential : public ForceCompute
 
     std::shared_ptr<hoomd::md::NeighborList> m_nlist; //!< Neighbor list
 
-    GPUArray<Scalar2> m_domain; //!< Approximation domain (5x2): 5 rows, each (min, max)
-
     Scalar m_r_cut;       //!< Cut-off distance in approximation domain
     Scalar m_nlist_r_cut; //!< Neighbor-list cutoff = ceil(max(r0) + r_cut)
 
     std::shared_ptr<GPUArray<Scalar>> m_r_cut_nlist; //!< r_cut matrix shared with nlist
     bool m_attached = true;                          //!< Whether attached to the simulation
 
-    GPUArray<unsigned int> m_terms; //!< Chebyshev term list (Nterms x 6)
+    GPUArray<unsigned int> m_terms; //!< Chebyshev term list (Nterms x num_coordinates)
     GPUArray<Scalar> m_coeffs;      //!< Coefficients corresponding to each term
     unsigned int m_Nterms;          //!< Number of terms
 
     GPUArray<Scalar> m_r0_data;        //!< R0 data
     GPUArray<unsigned int> m_r0_shape; //!< Points per dimension to sample r0
 
+    std::array<unsigned int, num_coordinates>
+        m_max_deg; //!< Maximum Chebyshev degree per coordinate
+    std::array<Scalar, num_coordinates>
+        m_cheb_scale;                   //!< Chain-rule scale factors for each coordinate
+    unsigned int m_max_deg_global;      //!< Maximum Chebyshev degree over all coordinates
+    Index2D m_cheb_idx;                 //!< Indexer for flat Chebyshev scratch storage
+    std::vector<Scalar> m_cheb_T_flat;  //!< Chebyshev polynomial scratch storage
+    std::vector<Scalar> m_cheb_dT_flat; //!< Chebyshev derivative scratch storage
+
+    void initializeChebyshevData();
     void computeForces(uint64_t timestep) override;
     };
 
@@ -161,34 +166,17 @@ ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::ChebyshevAnisotropicPairPoten
     unsigned int Nterms,
     const Scalar* r0_data,
     const unsigned int* r0_shape)
-    : ForceCompute(sysdef), m_nlist(nlist), m_r_cut(r_cut), m_Nterms(Nterms)
+    : ForceCompute(sysdef), m_nlist(nlist), m_r_cut(r_cut), m_Nterms(Nterms), m_max_deg_global(0),
+      m_cheb_idx()
     {
+        // terms: shape (Nterms, num_coordinates), stored flat
         {
-        GPUArray<Scalar2> domain_arr(5, m_exec_conf);
-        m_domain.swap(domain_arr);
-
-        ArrayHandle<Scalar2> h_domain(m_domain, access_location::host, access_mode::readwrite);
-        const Scalar angle_lower[5] = {
-            Scalar(0.0),    // theta
-            Scalar(1.0e-5), // phi
-            Scalar(0.0),    // alpha
-            Scalar(1.0e-5), // beta
-            Scalar(0.0)     // gamma
-        };
-
-        for (unsigned int d = 0; d < 5; ++d)
-            {
-            h_domain.data[d] = make_scalar2(angle_lower[d], ShapeSymmetryT::domain_upper[d]);
-            }
-        }
-
-        // terms: shape (Nterms, 6), stored flat
-        {
-        GPUArray<unsigned int> terms_arr(static_cast<size_t>(Nterms) * 6, m_exec_conf);
+        GPUArray<unsigned int> terms_arr(static_cast<size_t>(Nterms) * num_coordinates,
+                                         m_exec_conf);
         m_terms.swap(terms_arr);
 
-        ArrayHandle<unsigned int> h_terms(m_terms, access_location::host, access_mode::readwrite);
-        std::copy(terms, terms + static_cast<size_t>(Nterms) * 6, h_terms.data);
+        ArrayHandle<unsigned int> h_terms(m_terms, access_location::host, access_mode::overwrite);
+        std::copy(terms, terms + static_cast<size_t>(Nterms) * num_coordinates, h_terms.data);
         }
 
         // coeffs: shape (Nterms,)
@@ -196,24 +184,24 @@ ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::ChebyshevAnisotropicPairPoten
         GPUArray<Scalar> coeffs_arr(Nterms, m_exec_conf);
         m_coeffs.swap(coeffs_arr);
 
-        ArrayHandle<Scalar> h_coeffs(m_coeffs, access_location::host, access_mode::readwrite);
+        ArrayHandle<Scalar> h_coeffs(m_coeffs, access_location::host, access_mode::overwrite);
         std::copy(coeffs, coeffs + Nterms, h_coeffs.data);
         }
 
-        // r0_shape: length 5
+        // r0_shape: length num_angle_coordinates
         {
-        GPUArray<unsigned int> shape_arr(5, m_exec_conf);
+        GPUArray<unsigned int> shape_arr(num_angle_coordinates, m_exec_conf);
         m_r0_shape.swap(shape_arr);
 
         ArrayHandle<unsigned int> h_shape(m_r0_shape,
                                           access_location::host,
-                                          access_mode::readwrite);
-        std::copy(r0_shape, r0_shape + 5, h_shape.data);
+                                          access_mode::overwrite);
+        std::copy(r0_shape, r0_shape + num_angle_coordinates, h_shape.data);
         }
 
     // r0_data: flat array, length = product(r0_shape)
     unsigned int n_r0 = 1;
-    for (unsigned int d = 0; d < 5; ++d)
+    for (unsigned int d = 0; d < num_angle_coordinates; ++d)
         {
         n_r0 *= r0_shape[d];
         }
@@ -222,7 +210,7 @@ ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::ChebyshevAnisotropicPairPoten
         GPUArray<Scalar> r0_arr(n_r0, m_exec_conf);
         m_r0_data.swap(r0_arr);
 
-        ArrayHandle<Scalar> h_r0(m_r0_data, access_location::host, access_mode::readwrite);
+        ArrayHandle<Scalar> h_r0(m_r0_data, access_location::host, access_mode::overwrite);
         std::copy(r0_data, r0_data + n_r0, h_r0.data);
         }
 
@@ -230,15 +218,20 @@ ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::ChebyshevAnisotropicPairPoten
     Scalar max_r0 = *std::max_element(r0_data, r0_data + n_r0);
     m_nlist_r_cut = std::ceil(max_r0 + m_r_cut);
 
-    m_r_cut_nlist = std::make_shared<GPUArray<Scalar>>(1, m_exec_conf);
+    const Index2D typpair_idx(m_pdata->getNTypes());
+    m_r_cut_nlist = std::make_shared<GPUArray<Scalar>>(typpair_idx.getNumElements(), m_exec_conf);
         {
         ArrayHandle<Scalar> h_r_cut_nlist(*m_r_cut_nlist,
                                           access_location::host,
                                           access_mode::overwrite);
-        h_r_cut_nlist.data[0] = m_nlist_r_cut;
+        std::fill(h_r_cut_nlist.data,
+                  h_r_cut_nlist.data + typpair_idx.getNumElements(),
+                  m_nlist_r_cut);
         }
     m_nlist->addRCutMatrix(m_r_cut_nlist);
     m_nlist->notifyRCutMatrixChange();
+
+    initializeChebyshevData();
     }
 
 // Destructor
@@ -262,6 +255,46 @@ void ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::notifyDetach()
     m_attached = false;
     }
 
+// initializeChebyshevData
+template<class ShapeSymmetryT>
+void ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::initializeChebyshevData()
+    {
+    m_max_deg.fill(0);
+
+    ArrayHandle<unsigned int> h_terms(m_terms, access_location::host, access_mode::read);
+    for (unsigned int t = 0; t < m_Nterms; ++t)
+        {
+        for (unsigned int c = 0; c < num_coordinates; ++c)
+            {
+            const unsigned int deg = h_terms.data[t * num_coordinates + c];
+            if (deg > m_max_deg[c])
+                {
+                m_max_deg[c] = deg;
+                }
+            }
+        }
+
+    m_cheb_scale[0] = Scalar(2);
+    for (unsigned int d = 0; d < num_angle_coordinates; ++d)
+        {
+        m_cheb_scale[d + 1]
+            = Scalar(2) / (ShapeSymmetryT::domain_upper[d] - ShapeSymmetryT::domain_lower[d]);
+        }
+
+    m_max_deg_global = 0;
+    for (unsigned int c = 0; c < num_coordinates; ++c)
+        {
+        if (m_max_deg[c] > m_max_deg_global)
+            {
+            m_max_deg_global = m_max_deg[c];
+            }
+        }
+
+    m_cheb_idx = Index2D(m_max_deg_global + 1, num_coordinates);
+    m_cheb_T_flat.resize(m_cheb_idx.getNumElements());
+    m_cheb_dT_flat.resize(m_cheb_idx.getNumElements());
+    }
+
 // computeForces
 template<class ShapeSymmetryT>
 void ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::computeForces(uint64_t timestep)
@@ -270,7 +303,8 @@ void ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::computeForces(uint64_t t
     m_nlist->compute(timestep);
 
     // check neighbor list storage mode
-    const bool third_law = (m_nlist->getStorageMode() == hoomd::md::NeighborList::half);
+    const bool use_third_law = (m_nlist->getStorageMode() == hoomd::md::NeighborList::half);
+    const unsigned int N_local = m_pdata->getN();
     // access neighbor list, particle data, and simulation box
     ArrayHandle<unsigned int> h_n_neigh(m_nlist->getNNeighArray(),
                                         access_location::host,
@@ -285,7 +319,7 @@ void ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::computeForces(uint64_t t
     ArrayHandle<Scalar4> h_orientation(m_pdata->getOrientationArray(),
                                        access_location::host,
                                        access_mode::read);
-    ArrayHandle<Scalar2> h_domain(m_domain, access_location::host, access_mode::read);
+    ArrayHandle<unsigned int> h_tag(m_pdata->getTags(), access_location::host, access_mode::read);
     ArrayHandle<Scalar> h_r0_data(m_r0_data, access_location::host, access_mode::read);
     ArrayHandle<unsigned int> h_r0_shape(m_r0_shape, access_location::host, access_mode::read);
     ArrayHandle<unsigned int> h_terms(m_terms, access_location::host, access_mode::read);
@@ -295,39 +329,10 @@ void ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::computeForces(uint64_t t
     const Scalar nlist_rcutsq = m_nlist_r_cut * m_nlist_r_cut;
     const Scalar fd_step = Scalar(1.0e-6);
 
-    LinearInterpolator5D<Scalar> interp(h_r0_data.data, h_r0_shape.data, h_domain.data);
-
-    // Determine the maximum Chebyshev degree needed for each of the 6 coordinates
-    unsigned int max_deg[6] = {0, 0, 0, 0, 0, 0};
-    for (unsigned int t = 0; t < m_Nterms; ++t)
-        {
-        for (unsigned int c = 0; c < 6; ++c)
-            {
-            const unsigned int deg = h_terms.data[t * 6 + c];
-            if (deg > max_deg[c])
-                max_deg[c] = deg;
-            }
-        }
-
-    // Chain-rule scale factors: d(x_scaled)/d(x) = 2 / (hi - lo)
-    Scalar cheb_scale[6];
-    cheb_scale[0] = Scalar(2);
-    for (unsigned int d = 0; d < 5; ++d)
-        {
-        cheb_scale[d + 1] = Scalar(2) / (h_domain.data[d].y - h_domain.data[d].x);
-        }
-
-    // Flat 1D Chebyshev storage
-    unsigned int max_deg_global = 0;
-    for (unsigned int c = 0; c < 6; ++c)
-        {
-        if (max_deg[c] > max_deg_global)
-            max_deg_global = max_deg[c];
-        }
-
-    const Index2D cheb_idx(max_deg_global + 1, 6);
-    std::vector<Scalar> cheb_T_flat(cheb_idx.getNumElements());
-    std::vector<Scalar> cheb_dT_flat(cheb_idx.getNumElements());
+    LinearInterpolator5D<Scalar> interp(h_r0_data.data,
+                                        h_r0_shape.data,
+                                        ShapeSymmetryT::domain_lower,
+                                        ShapeSymmetryT::domain_upper);
 
     m_force.zeroFill();
     m_torque.zeroFill();
@@ -340,15 +345,15 @@ void ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::computeForces(uint64_t t
     //! Euler-angle singularity tolerance for the alpha/gamma extraction.
     const Scalar euler_singularity_tol = Scalar(1e-7);
 
-    //! beta threshold for the Jacobian (avoids 1/sin(beta) singulrity).
-    const Scalar beta_tol = Scalar(1e-5);
+    const Scalar phi_eval_min = ShapeSymmetryT::domain_lower[1];
+    const Scalar phi_eval_max = ShapeSymmetryT::domain_upper[1];
+    const Scalar beta_eval_min = ShapeSymmetryT::domain_lower[3];
+    const Scalar beta_eval_max = ShapeSymmetryT::domain_upper[3];
 
     for (unsigned int i = 0; i < N; ++i)
         {
-        // Particle i position and orientation
-        const Scalar3 pos_i = make_scalar3(h_pos.data[i].x, h_pos.data[i].y, h_pos.data[i].z);
-        const quat<Scalar> q_i(h_orientation.data[i]);
-        const quat<Scalar> q_i_conj = conj(q_i);
+        // Per-pair position and orientation are loaded inside the loop after
+        // sorting by tag (on full lists)
 
         // Initialize particle force, torque, and energy
         Scalar3 fi = make_scalar3(0, 0, 0);
@@ -364,8 +369,26 @@ void ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::computeForces(uint64_t t
             const unsigned int j = h_nlist.data[myHead + k];
             assert(j < m_pdata->getN() + m_pdata->getNGhosts());
 
-            const Scalar3 pos_j = make_scalar3(h_pos.data[j].x, h_pos.data[j].y, h_pos.data[j].z);
-            Scalar3 dx = pos_i - pos_j;
+            // Sort the pair by tag
+            unsigned int eval_a = i;
+            unsigned int eval_b = j;
+            bool i_is_eval_a = true;
+                {
+                const unsigned int tag_i = h_tag.data[i];
+                const unsigned int tag_j = h_tag.data[j];
+                if (tag_j < tag_i)
+                    {
+                    eval_a = j;
+                    eval_b = i;
+                    i_is_eval_a = false;
+                    }
+                }
+
+            const Scalar3 pos_a
+                = make_scalar3(h_pos.data[eval_a].x, h_pos.data[eval_a].y, h_pos.data[eval_a].z);
+            const Scalar3 pos_b
+                = make_scalar3(h_pos.data[eval_b].x, h_pos.data[eval_b].y, h_pos.data[eval_b].z);
+            Scalar3 dx = pos_a - pos_b;
             // Apply periodic boundary conditions
             dx = box.minImage(dx);
             // Neighbor-list cutoff check (center-center distance).
@@ -375,19 +398,19 @@ void ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::computeForces(uint64_t t
                 continue;
                 }
 
-            // particle j, orientation quaternion
-            const quat<Scalar> q_j(h_orientation.data[j]);
-            // dx is in lab frame, so rotate dx by conj(q_i)
-            const vec3<Scalar> dx_body = rotate(q_i_conj, vec3<Scalar>(dx));
-
-            // Relative orientation of j with respect to i:
-            //     q_rel = conj(q_i) * q_j
+            const quat<Scalar> q_a(h_orientation.data[eval_a]);
+            const quat<Scalar> q_b(h_orientation.data[eval_b]);
+            const quat<Scalar> q_a_conj = conj(q_a);
+            // dx is in lab frame, so rotate dx by conj(q_a)
+            const vec3<Scalar> dx_body = rotate(q_a_conj, vec3<Scalar>(dx));
+            // Relative orientation of eval_b with respect to eval_a:
+            //     q_rel = conj(q_a) * q_b
             // ref:
             // https://www.mathworks.com/help/fusion/ug/rotations-orientation-and-quaternions.html
-            const quat<Scalar> q_rel = q_i_conj * q_j;
+            const quat<Scalar> q_rel = q_a_conj * q_b;
 
             // Convert position to spherical coordinates
-            // Skip overlapping particles.
+            // Skip overlapping particles
             const Scalar r = fast::sqrt(dot(dx_body, dx_body));
             if (r < Scalar(1e-12))
                 {
@@ -448,19 +471,19 @@ void ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::computeForces(uint64_t t
 
             // Move phi and beta away from 0 and pi to avoid 1/sin(beta or phi)
             // singularity in the Jacobian (used the same threshold as beta).
-            if (beta < beta_tol)
-                beta = beta_tol;
-            else if (beta > Scalar(M_PI) - beta_tol)
-                beta = Scalar(M_PI) - beta_tol;
+            if (beta < beta_eval_min)
+                beta = beta_eval_min;
+            else if (beta > beta_eval_max)
+                beta = beta_eval_max;
 
-            if (phi < beta_tol)
-                phi = beta_tol;
-            else if (phi > Scalar(M_PI) - beta_tol)
-                phi = Scalar(M_PI) - beta_tol;
+            if (phi < phi_eval_min)
+                phi = phi_eval_min;
+            else if (phi > phi_eval_max)
+                phi = phi_eval_max;
 
             // Compute r0 and all 5 derivatives
             Scalar r0;
-            Scalar dr0[5];
+            Scalar dr0[num_angle_coordinates];
             interp.valueAndDerivatives(theta, phi, alpha, beta, gamma, fd_step, r0, dr0);
             const Scalar dr0_dtheta = dr0[0];
             const Scalar dr0_dphi = dr0[1];
@@ -497,50 +520,53 @@ void ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::computeForces(uint64_t t
             // Chebyshev evaluation: scale each coordinate to [-1,1]
             // and evaluate polynomials + derivatives up to max degree.
             chebEvaluate(chebScale(rho, Scalar(0), Scalar(1)),
-                         max_deg[0],
-                         cheb_T_flat.data() + cheb_idx(0, 0),
-                         cheb_dT_flat.data() + cheb_idx(0, 0));
+                         m_max_deg[0],
+                         m_cheb_T_flat.data() + m_cheb_idx(0, 0),
+                         m_cheb_dT_flat.data() + m_cheb_idx(0, 0));
 
-            const Scalar ang_coords[5] = {theta, phi, alpha, beta, gamma};
+            const Scalar ang_coords[num_angle_coordinates] = {theta, phi, alpha, beta, gamma};
             for (unsigned int c = 0; c < 5; ++c)
                 {
-                chebEvaluate(chebScale(ang_coords[c], h_domain.data[c].x, h_domain.data[c].y),
-                             max_deg[c + 1],
-                             cheb_T_flat.data() + cheb_idx(0, c + 1),
-                             cheb_dT_flat.data() + cheb_idx(0, c + 1));
+                chebEvaluate(chebScale(ang_coords[c],
+                                       ShapeSymmetryT::domain_lower[c],
+                                       ShapeSymmetryT::domain_upper[c]),
+                             m_max_deg[c + 1],
+                             m_cheb_T_flat.data() + m_cheb_idx(0, c + 1),
+                             m_cheb_dT_flat.data() + m_cheb_idx(0, c + 1));
                 }
 
             // Evaluate u and du/d(coord_k)
             Scalar u = Scalar(0);
-            Scalar du[6] = {Scalar(0), Scalar(0), Scalar(0), Scalar(0), Scalar(0), Scalar(0)};
+            Scalar du[num_coordinates]
+                = {Scalar(0), Scalar(0), Scalar(0), Scalar(0), Scalar(0), Scalar(0)};
 
             for (unsigned int t = 0; t < m_Nterms; ++t)
                 {
-                const unsigned int* degs = h_terms.data + 6 * t;
+                const unsigned int* degs = h_terms.data + num_coordinates * t;
                 const Scalar coeff = h_coeffs.data[t];
 
-                Scalar T_vals[6];
-                Scalar dT_vals[6];
-                for (unsigned int c = 0; c < 6; ++c)
+                Scalar T_vals[num_coordinates];
+                Scalar dT_vals[num_coordinates];
+                for (unsigned int c = 0; c < num_coordinates; ++c)
                     {
-                    T_vals[c] = cheb_T_flat[cheb_idx(degs[c], c)];
-                    dT_vals[c] = cheb_dT_flat[cheb_idx(degs[c], c)];
+                    T_vals[c] = m_cheb_T_flat[m_cheb_idx(degs[c], c)];
+                    dT_vals[c] = m_cheb_dT_flat[m_cheb_idx(degs[c], c)];
                     }
 
-                Scalar prefix[7];
+                Scalar prefix[num_coordinates + 1];
                 prefix[0] = Scalar(1);
-                for (unsigned int c = 0; c < 6; ++c)
+                for (unsigned int c = 0; c < num_coordinates; ++c)
                     prefix[c + 1] = prefix[c] * T_vals[c];
 
-                Scalar suffix[7];
-                suffix[6] = Scalar(1);
-                for (int c = 5; c >= 0; --c)
+                Scalar suffix[num_coordinates + 1];
+                suffix[num_coordinates] = Scalar(1);
+                for (int c = static_cast<int>(num_coordinates) - 1; c >= 0; --c)
                     suffix[c] = suffix[c + 1] * T_vals[c];
 
-                u += coeff * prefix[6];
+                u += coeff * prefix[num_coordinates];
 
-                for (unsigned int c = 0; c < 6; ++c)
-                    du[c] += coeff * dT_vals[c] * cheb_scale[c] * prefix[c] * suffix[c + 1];
+                for (unsigned int c = 0; c < num_coordinates; ++c)
+                    du[c] += coeff * dT_vals[c] * m_cheb_scale[c] * prefix[c] * suffix[c + 1];
                 }
 
             // Linear extrapolation for energy when rho < 0
@@ -600,28 +626,30 @@ void ChebyshevAnisotropicPairPotential<ShapeSymmetryT>::computeForces(uint64_t t
             const Scalar tau_y = tau_lab.y;
             const Scalar tau_z = tau_lab.z;
 
-            // Accumulate for particle i
-            fi.x += f_x;
-            fi.y += f_y;
-            fi.z += f_z;
+            // Writeback
+            const Scalar sign = i_is_eval_a ? Scalar(1) : Scalar(-1);
+
+            fi.x += sign * f_x;
+            fi.y += sign * f_y;
+            fi.z += sign * f_z;
 
-            ti.x += tau_x;
-            ti.y += tau_y;
-            ti.z += tau_z;
+            ti.x += sign * tau_x;
+            ti.y += sign * tau_y;
+            ti.z += sign * tau_z;
 
             pei += u_energy;
 
             // Newton's third law for half neighbor list
-            if (third_law)
+            if (use_third_law && j < N_local)
                 {
-                h_force.data[j].x -= f_x;
-                h_force.data[j].y -= f_y;
-                h_force.data[j].z -= f_z;
+                h_force.data[j].x -= sign * f_x;
+                h_force.data[j].y -= sign * f_y;
+                h_force.data[j].z -= sign * f_z;
                 h_force.data[j].w += Scalar(0.5) * u_energy;
 
-                h_torque.data[j].x -= tau_x;
-                h_torque.data[j].y -= tau_y;
-                h_torque.data[j].z -= tau_z;
+                h_torque.data[j].x -= sign * tau_x;
+                h_torque.data[j].y -= sign * tau_y;
+                h_torque.data[j].z -= sign * tau_z;
                 }
             }
 
diff --git a/src/ChebyshevAnisotropicPairPotentialGPU.h b/src/ChebyshevAnisotropicPairPotentialGPU.h
new file mode 100644
index 0000000..17b9444
--- /dev/null
+++ b/src/ChebyshevAnisotropicPairPotentialGPU.h
@@ -0,0 +1,146 @@
+// Copyright (c) 2018-2020, Michael P. Howard
+// Copyright (c) 2021-2025, Auburn University
+// Part of azplugins, released under the BSD 3-Clause License.
+
+#ifndef AZPLUGINS_CHEBYSHEV_ANISOTROPIC_PAIR_POTENTIAL_GPU_H_
+#define AZPLUGINS_CHEBYSHEV_ANISOTROPIC_PAIR_POTENTIAL_GPU_H_
+
+#include <memory>
+#ifdef ENABLE_HIP
+#include "ChebyshevAnisotropicPairPotential.h"
+
+/*!\file ChebyshevAnisotropicPairPotentialGPU.h
+   \brief Defines a GPU shell for the Chebyshev anisotropic pair potential.
+   \note This header cannot be compiled by nvcc
+*/
+
+#ifdef __HIPCC__
+#error This header cannot be compiled by nvcc
+#endif
+
+#include <pybind11/pybind11.h>
+
+namespace hoomd
+    {
+namespace azplugins
+    {
+
+//! Chebyshev anisotropic pair potential, templated on a symmetry reducer.
+/*!
+ * \tparam ShapeSymmetryT A class providing a static \c domain_upper[5] and domain_lower[5] array
+ *         and a static \c reduce(theta, phi, alpha, beta, gamma) method that
+ *         maps the angles into a fundamental domain and returns the applied
+ *         rotation as a quaternion.  See \c ShapeSymmetry.h.
+ */
+template<class ShapeSymmetryT>
+class PYBIND11_EXPORT ChebyshevAnisotropicPairPotentialGPU
+    : public ChebyshevAnisotropicPairPotential<ShapeSymmetryT>
+    {
+    public:
+    using Base = ChebyshevAnisotropicPairPotential<ShapeSymmetryT>;
+
+    ChebyshevAnisotropicPairPotentialGPU(std::shared_ptr<SystemDefinition> sysdef,
+                                         std::shared_ptr<hoomd::md::NeighborList> nlist,
+                                         const Scalar r_cut,
+                                         const unsigned int* terms,
+                                         const Scalar* coeffs,
+                                         unsigned int Nterms,
+                                         const Scalar* r0_data,
+                                         const unsigned int* r0_shape);
+
+    virtual ~ChebyshevAnisotropicPairPotentialGPU() { }
+    };
+
+template<class ShapeSymmetryT>
+ChebyshevAnisotropicPairPotentialGPU<ShapeSymmetryT>::ChebyshevAnisotropicPairPotentialGPU(
+    std::shared_ptr<SystemDefinition> sysdef,
+    std::shared_ptr<hoomd::md::NeighborList> nlist,
+    const Scalar r_cut,
+    const unsigned int* terms,
+    const Scalar* coeffs,
+    unsigned int Nterms,
+    const Scalar* r0_data,
+    const unsigned int* r0_shape)
+    : Base(sysdef, nlist, r_cut, terms, coeffs, Nterms, r0_data, r0_shape)
+    {
+    if (!this->m_exec_conf->isCUDAEnabled())
+        {
+        this->m_exec_conf->msg->error()
+            << "Creating a ChebyshevAnisotropicPairPotentialGPU with no GPU in the "
+            << "execution configuration" << std::endl;
+        throw std::runtime_error("Error initializing ChebyshevAnisotropicPairPotentialGPU");
+        }
+    }
+
+namespace detail
+    {
+
+//! Export one GPU subclass of ChebyshevAnisotropicPairPotential to python.
+/*!
+ * \param m    pybind11 module.
+ * \param name Name the class should have in the python module (must be
+ *             unique per symmetry).
+ * \tparam ShapeSymmetryT Symmetry evaluator type.
+ */
+template<class ShapeSymmetryT>
+void export_ChebyshevAnisotropicPairPotentialGPU(pybind11::module& m, const std::string& name)
+    {
+    namespace py = pybind11;
+    using NL = hoomd::md::NeighborList;
+    using Pot = ChebyshevAnisotropicPairPotentialGPU<ShapeSymmetryT>;
+    using Base = ChebyshevAnisotropicPairPotential<ShapeSymmetryT>;
+
+    py::class_<Pot, Base, std::shared_ptr<Pot>>(m, name.c_str())
+        .def(py::init(
+            [](std::shared_ptr<SystemDefinition> sysdef,
+               std::shared_ptr<NL> nlist,
+               Scalar r_cut,
+               py::array_t<unsigned int, py::array::c_style | py::array::forcecast> terms,
+               py::array_t<Scalar, py::array::c_style | py::array::forcecast> coeffs,
+               py::array_t<Scalar, py::array::c_style | py::array::forcecast> r0_data)
+            {
+                if (terms.ndim() != 2 || terms.shape(1) != Base::num_coordinates)
+                    {
+                    throw std::runtime_error("terms must have shape (Nterms,6).");
+                    }
+
+                const unsigned int Nterms = static_cast<unsigned int>(terms.shape(0));
+
+                if (coeffs.ndim() != 1 || static_cast<unsigned int>(coeffs.shape(0)) != Nterms)
+                    {
+                    throw std::runtime_error("coeffs must have shape (Nterms,).");
+                    }
+
+                if (r0_data.ndim() != Base::num_angle_coordinates)
+                    {
+                    throw std::runtime_error("r0_data must be a 5D array.");
+                    }
+
+                std::array<unsigned int, Base::num_angle_coordinates> r0_shape;
+                for (unsigned int k = 0; k < Base::num_angle_coordinates; ++k)
+                    {
+                    const auto dim = r0_data.shape(k);
+                    if (dim < 2)
+                        {
+                        throw std::runtime_error("r0_data has invalid dimension(s).");
+                        }
+                    r0_shape[k] = static_cast<unsigned int>(dim);
+                    }
+
+                return std::make_shared<Pot>(sysdef,
+                                             nlist,
+                                             r_cut,
+                                             terms.data(),
+                                             coeffs.data(),
+                                             Nterms,
+                                             r0_data.data(),
+                                             r0_shape.data());
+            }));
+    }
+
+    } // end namespace detail
+    } // end namespace azplugins
+    } // end namespace hoomd
+
+#endif // ENABLE_HIP
+#endif // AZPLUGINS_CHEBYSHEV_ANISOTROPIC_PAIR_POTENTIAL_GPU_H_
diff --git a/src/ShapeSymmetry.h b/src/ShapeSymmetry.h
index cd5f37a..8c1158f 100644
--- a/src/ShapeSymmetry.h
+++ b/src/ShapeSymmetry.h
@@ -128,12 +128,20 @@ eulerFromQuat(const quat<Scalar>& q, Scalar& alpha, Scalar& beta, Scalar& gamma)
 
     } // namespace detail
 
+//! Base class for shared reduced-domain bounds.
+class ShapeSymmetry
+    {
+    public:
+    static constexpr Scalar domain_lower[5]
+        = {Scalar(0.0), Scalar(1.0e-5), Scalar(0.0), Scalar(1.0e-5), Scalar(0.0)};
+    };
+
 //! Null symmetry: no reduction.
 /*! Full natural domain:
     theta in [0, 2 pi], phi in [1e-5, pi-1e-5], alpha in [0, 2 pi],
     beta in [1e-5, pi-1e-5], gamma in [0, 2 pi].
 */
-class ShapeSymmetryNull
+class ShapeSymmetryNull : public ShapeSymmetry
     {
     public:
     //! Upper bounds of the reduced domain.
@@ -165,7 +173,7 @@ class ShapeSymmetryNull
     theta in [0, pi/4], phi in [1e-5, pi/2], alpha in [0, 2 pi],
     beta in [1e-5, arccos(1/sqrt(3))], gamma in [0, pi/2].
 */
-class ShapeSymmetryCube
+class ShapeSymmetryCube : public ShapeSymmetry
     {
     public:
     //! Upper bounds of the reduced domain.
@@ -280,10 +288,10 @@ class ShapeSymmetryCube
 
 //! Tetrahedron symmetry evaluator.
 /*! Reduced domain:
-    theta in [0, 2 pi/3], phi in [0, pi], alpha in [0, 2 pi],
-    beta in [0, pi], gamma in [0, 2 pi/3].
+    theta in [0, 2 pi/3], phi in [1e-5, pi], alpha in [0, 2 pi],
+    beta in [1e-5, pi-1e-5], gamma in [0, 2 pi/3].
 */
-class ShapeSymmetryTetrahedron
+class ShapeSymmetryTetrahedron : public ShapeSymmetry
     {
     public:
     //! Upper bounds of the reduced domain.
diff --git a/src/export_ChebyshevAnisotropicPairPotentialGPU.cc.inc b/src/export_ChebyshevAnisotropicPairPotentialGPU.cc.inc
new file mode 100644
index 0000000..2b72073
--- /dev/null
+++ b/src/export_ChebyshevAnisotropicPairPotentialGPU.cc.inc
@@ -0,0 +1,39 @@
+// Copyright (c) 2018-2020, Michael P. Howard
+// Copyright (c) 2021-2025, Auburn University
+// Part of azplugins, released under the BSD 3-Clause License.
+
+// Adapted from hoomd/md/export_PotentialPairGPU.cc.inc of HOOMD-blue.
+// Copyright (c) 2009-2026 The Regents of the University of Michigan.
+// Part of HOOMD-blue, released under the BSD 3-Clause License.
+
+// See CMakeLists.txt for the source of these variables to be processed by CMake's
+// configure_file().
+
+#ifdef ENABLE_HIP
+
+// clang-format off
+#include "ChebyshevAnisotropicPairPotentialGPU.h"
+
+#define SYMMETRY_CLASS ShapeSymmetry@_symmetry@
+#define EXPORT_FUNCTION export_ChebyshevAnisotropicPairPotential@_symmetry@GPU
+// clang-format on
+
+namespace hoomd
+    {
+namespace azplugins
+    {
+namespace detail
+    {
+
+void EXPORT_FUNCTION(pybind11::module& m)
+    {
+    export_ChebyshevAnisotropicPairPotentialGPU<SYMMETRY_CLASS>(
+        m,
+        "ChebyshevAnisotropicPairPotential@_symmetry@GPU");
+    }
+
+    } // end namespace detail
+    } // end namespace azplugins
+    } // end namespace hoomd
+
+#endif // ENABLE_HIP
diff --git a/src/module.cc b/src/module.cc
index 3ecfcfe..4e75cd7 100644
--- a/src/module.cc
+++ b/src/module.cc
@@ -101,6 +101,9 @@ void export_SphericalHarmonicBarrierGPU(pybind11::module&);
 
 // pair
 void export_AnisoPotentialPairTwoPatchMorseGPU(pybind11::module&);
+void export_ChebyshevAnisotropicPairPotentialNullGPU(pybind11::module&);
+void export_ChebyshevAnisotropicPairPotentialCubeGPU(pybind11::module&);
+void export_ChebyshevAnisotropicPairPotentialTetrahedronGPU(pybind11::module&);
 void export_PotentialPairColloidGPU(pybind11::module&);
 void export_PotentialPairExpandedYukawaGPU(pybind11::module&);
 void export_PotentialPairHertzGPU(pybind11::module&);
@@ -176,6 +179,9 @@ PYBIND11_MODULE(_azplugins, m)
 
     // pair
     export_AnisoPotentialPairTwoPatchMorseGPU(m);
+    export_ChebyshevAnisotropicPairPotentialNullGPU(m);
+    export_ChebyshevAnisotropicPairPotentialCubeGPU(m);
+    export_ChebyshevAnisotropicPairPotentialTetrahedronGPU(m);
     export_PotentialPairColloidGPU(m);
     export_PotentialPairExpandedYukawaGPU(m);
     export_PotentialPairHertzGPU(m);
diff --git a/src/pair.py b/src/pair.py
index bdb22f8..10884ed 100644
--- a/src/pair.py
+++ b/src/pair.py
@@ -5,11 +5,13 @@
 """Pair potentials."""
 
 import numpy
+import hoomd
 from hoomd.azplugins import _azplugins
 from hoomd.data.parameterdicts import ParameterDict, TypeParameterDict
 from hoomd.data.typeparam import TypeParameter
 from hoomd.md import pair
 from hoomd.md.force import Force
+from hoomd.md import _md
 from hoomd.variant import Variant
 
 
@@ -58,7 +60,13 @@ def __init__(self, nlist, terms, coeffs, r0, r_cut):
     def _attach_hook(self):
         self._nlist._attach(self._simulation)
 
-        cls = getattr(self._ext_module, self._cpp_class_name)
+        if isinstance(self._simulation.device, hoomd.device.CPU):
+            cls = getattr(self._ext_module, self._cpp_class_name)
+            self._nlist._cpp_obj.setStorageMode(_md.NeighborList.storageMode.half)
+        else:
+            cls = getattr(self._ext_module, self._cpp_class_name + "GPU")
+            self._nlist._cpp_obj.setStorageMode(_md.NeighborList.storageMode.full)
+
         self._cpp_obj = cls(
             self._simulation.state._cpp_sys_def,
             self._nlist._cpp_obj,
@@ -70,6 +78,9 @@ def _attach_hook(self):
 
         super()._attach_hook()
 
+    def _detach_hook(self):
+        self._nlist._detach()
+
 
 class ChebyshevAnisotropicPairPotentialCube(ChebyshevAnisotropicPairPotential):
     """Chebyshev anisotropic pair potential with cube symmetry reduction.
diff --git a/src/pytest/test_chebyshev.py b/src/pytest/test_chebyshev.py
index 14282f5..c6aa277 100644
--- a/src/pytest/test_chebyshev.py
+++ b/src/pytest/test_chebyshev.py
@@ -2,8 +2,11 @@
 # Copyright (c) 2021-2025, Auburn University
 # Part of azplugins, released under the BSD 3-Clause License.
 
+"""Chebyshev anisotropic pair potential unit tests."""
+
+import collections
+
 import numpy
-from scipy.interpolate import RegularGridInterpolator
 from scipy.spatial.transform import Rotation
 
 import hoomd
@@ -11,126 +14,101 @@
 
 import pytest
 
-# Parameters that are identical across every energy/force/torque test.
-rc = 3.0
-phi_min = 1e-5
-beta_min = 1e-5
 
-terms = numpy.array(
-    [
-        [0, 0, 0, 0, 0, 0],
-        [0, 0, 0, 1, 0, 0],
-        [1, 0, 0, 0, 0, 0],
-        [1, 0, 0, 1, 0, 0],
-    ],
-    dtype=numpy.uint32,
-)
-coeffs = numpy.array([1.0, 0.25, 1.5, -1.0], dtype=numpy.float64)
+_DEVICE_PARAMS = ["cpu"]
 
-# r0 data: shape (3, 2, 3, 2, 3) = 108 values.
-r0_data = numpy.array([1, 2.1, 3.2] * 36, dtype=numpy.float64).reshape(3, 2, 3, 2, 3)
+if hoomd.version.gpu_enabled:
+    try:
+        if len(hoomd.device.GPU.get_available_devices()) > 0:
+            _DEVICE_PARAMS.append("gpu")
+    except Exception:
+        pass
 
 
-def rho_to_r(rho, r0, rc):
-    """Invert rho = (1/r - 1/r0) / (1/(r0+rc) - 1/r0) to recover r."""
-    inv_r0 = 1.0 / r0
-    inv_r0_rc = 1.0 / (r0 + rc)
-    inv_r = rho * (inv_r0_rc - inv_r0) + inv_r0
-    return 1.0 / inv_r
+@pytest.fixture(params=_DEVICE_PARAMS)
+def simulation_factory(request):
+    """Create a Simulation on CPU, and on GPU when available."""
 
+    def make_simulation(snapshot):
+        if request.param == "cpu":
+            device = hoomd.device.CPU()
+        else:
+            device = hoomd.device.GPU()
 
-def build_simulation(
-    simulation_factory,
-    two_particle_snapshot_factory,
-    pot_cls,
-    r0,
-    rho,
-    theta,
-    phi,
-    alpha,
-    beta,
-    gamma,
-):
-    """Place two particles at the prescribed coordinates and
-    return the attached potential.
-
-    Particle 0 sits at the origin with identity orientation.  Particle 1
-    is placed so that the C++ code sees (rho, theta, phi, alpha, beta,
-    gamma) as the pair's generalised coordinates.  For Null symmetry the
-    caller supplies ``r0`` from the test's own interpolator; for Cube /
-    Tetrahedron tests the caller supplies the reduced-frame ``r0``
-    directly because the input angles need not to be in the reduced
-    coordinates which interpolator expects.
-    """
-    snap = two_particle_snapshot_factory()
-    if snap.communicator.rank == 0:
-        r = rho_to_r(rho, r0, rc)
+        sim = hoomd.Simulation(device=device, seed=1)
+        sim.create_state_from_snapshot(snapshot)
+        return sim
 
-        dx = r * numpy.sin(phi) * numpy.cos(theta)
-        dy = r * numpy.sin(phi) * numpy.sin(theta)
-        dz = r * numpy.cos(phi)
+    return make_simulation
 
-        q_j = Rotation.from_euler("ZXZ", [alpha, beta, gamma]).as_quat(
-            scalar_first=True
-        )
 
-        snap.particles.position[0] = [0.0, 0.0, 0.0]
-        snap.particles.position[1] = [-dx, -dy, -dz]
-        snap.particles.orientation[0] = [1, 0, 0, 0]
-        snap.particles.orientation[1] = q_j
-        snap.particles.moment_inertia[:] = [0.1, 0.1, 0.1]
+@pytest.fixture
+def two_particle_snapshot_factory():
+    """Create a basic 2-particle snapshot for pair-potential tests."""
 
-    sim = simulation_factory(snap)
+    def make_snapshot():
+        snap = hoomd.Snapshot()
 
-    integrator = hoomd.md.Integrator(dt=0.001)
-    integrator.methods = [hoomd.md.methods.ConstantVolume(hoomd.filter.All())]
+        if snap.communicator.rank == 0:
+            snap.configuration.box = [20, 20, 20, 0, 0, 0]
+            snap.particles.N = 2
+            snap.particles.types = ["A"]
+            snap.particles.typeid[:] = [0, 0]
+            snap.particles.position[:] = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
+            snap.particles.orientation[:] = [[1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0]]
+            snap.particles.moment_inertia[:] = [0.0, 0.0, 0.0]
 
-    nlist = hoomd.md.nlist.Cell(buffer=1)
-    pot = pot_cls(
-        nlist=nlist,
-        terms=terms,
-        coeffs=coeffs,
-        r0=r0_data,
-        r_cut=rc,
+        return snap
+
+    return make_snapshot
+
+
+def good_kwargs():
+    """A set of constructor kwargs known to be valid."""
+    return dict(
+        nlist=hoomd.md.nlist.Cell(buffer=0.4),
+        terms=numpy.zeros((1, 6), dtype=numpy.uint32),
+        coeffs=numpy.zeros((1,), dtype=numpy.float64),
+        r0=numpy.zeros((2, 2, 2, 2, 2), dtype=numpy.float64),
+        r_cut=3.0,
     )
 
-    integrator.forces = [pot]
-    sim.operations.integrator = integrator
-    sim.run(0)
-    return sim, pot
 
+def test_chebyshev_invalid_terms_shape():
+    """Raise ValueError when ``terms`` is not (Nterms, 6)."""
+    kwargs = good_kwargs()
+    kwargs["terms"] = numpy.zeros((1, 5), dtype=numpy.uint32)
+    kwargs["coeffs"] = numpy.zeros((1,), dtype=numpy.float64)
+    with pytest.raises(ValueError, match=r"terms must have shape \(Nterms, 6\)\."):
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(**kwargs)
 
-def check_pair(sim, pot, expected_energy, expected_force, expected_torque):
-    """Compare the C++ output on both particles."""
-    if sim.device.communicator.rank == 0:
-        half_e = 0.5 * expected_energy
-
-        numpy.testing.assert_allclose(pot.energies[0], half_e, atol=1e-3, rtol=1e-3)
-        numpy.testing.assert_allclose(
-            pot.forces[0], expected_force, atol=1e-3, rtol=1e-3
-        )
-        numpy.testing.assert_allclose(
-            pot.torques[0], expected_torque, atol=1e-3, rtol=1e-3
-        )
-
-        numpy.testing.assert_allclose(pot.energies[1], half_e, atol=1e-3, rtol=1e-3)
-        numpy.testing.assert_allclose(
-            pot.forces[1], -expected_force, atol=1e-3, rtol=1e-3
-        )
-        numpy.testing.assert_allclose(
-            pot.torques[1], -expected_torque, atol=1e-3, rtol=1e-3
-        )
-
-
-def check_zero_pair(sim, pot):
-    """Assert that both particles have zero force, torque, and energy."""
-    if sim.device.communicator.rank == 0:
-        numpy.testing.assert_allclose(pot.energies[0], 0.0, atol=1e-10)
-        numpy.testing.assert_allclose(pot.forces[0], [0.0, 0.0, 0.0], atol=1e-10)
-        numpy.testing.assert_allclose(pot.torques[0], [0.0, 0.0, 0.0], atol=1e-10)
-        numpy.testing.assert_allclose(pot.energies[1], 0.0, atol=1e-10)
-        numpy.testing.assert_allclose(pot.forces[1], [0.0, 0.0, 0.0], atol=1e-10)
-        numpy.testing.assert_allclose(pot.torques[1], [0.0, 0.0, 0.0], atol=1e-10)
+
+def test_chebyshev_invalid_coeffs_shape():
+    """Raise ValueError when ``coeffs`` length does not match Nterms."""
+    kwargs = good_kwargs()
+    kwargs["terms"] = numpy.zeros((2, 6), dtype=numpy.uint32)
+    kwargs["coeffs"] = numpy.zeros((1,), dtype=numpy.float64)
+    with pytest.raises(ValueError, match=r"coeffs must have shape \(Nterms,\)"):
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(**kwargs)
+
+
+def test_chebyshev_invalid_r0_ndim():
+    """Raise ValueError when ``r0`` is not a 5D array."""
+    kwargs = good_kwargs()
+    kwargs["r0"] = numpy.zeros((2, 2, 2, 2), dtype=numpy.float64)
+    with pytest.raises(ValueError, match=r"r0 must be a 5D array\."):
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(**kwargs)
+
+
+def test_chebyshev_invalid_r0_shape():
+    """Raise ValueError when ``r0`` has a dimension with less than 2 points."""
+    kwargs = good_kwargs()
+    kwargs["r0"] = numpy.zeros((2, 2, 1, 2, 2), dtype=numpy.float64)
+    with pytest.raises(
+        ValueError,
+        match=r"r0 must have at least 2 grid points along each of its 5 dimensions\.",
+    ):
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(**kwargs)
 
 
 def test_chebyshev_construct_attach_zero(
@@ -190,506 +168,469 @@ def test_chebyshev_construct_attach_zero(
         numpy.testing.assert_array_equal(pot.energies, numpy.zeros((2,)))
 
 
-def good_kwargs():
-    """A set of constructor kwargs known to be valid."""
-    return dict(
-        nlist=hoomd.md.nlist.Cell(buffer=0.4),
-        terms=numpy.zeros((1, 6), dtype=numpy.uint32),
-        coeffs=numpy.zeros((1,), dtype=numpy.float64),
-        r0=numpy.zeros((2, 2, 2, 2, 2), dtype=numpy.float64),
-        r_cut=3.0,
-    )
-
-
-def test_chebyshev_invalid_terms_shape():
-    """Raise ValueError when ``terms`` is not (Nterms, 6)."""
-    kwargs = good_kwargs()
-    kwargs["terms"] = numpy.zeros((1, 5), dtype=numpy.uint32)
-    kwargs["coeffs"] = numpy.zeros((1,), dtype=numpy.float64)
-    with pytest.raises(ValueError, match=r"terms must have shape \(Nterms, 6\)\."):
-        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(**kwargs)
-
-
-def test_chebyshev_invalid_coeffs_shape():
-    """Raise ValueError when ``coeffs`` length does not match Nterms."""
-    kwargs = good_kwargs()
-    kwargs["terms"] = numpy.zeros((2, 6), dtype=numpy.uint32)
-    kwargs["coeffs"] = numpy.zeros((1,), dtype=numpy.float64)
-    with pytest.raises(ValueError, match=r"coeffs must have shape \(Nterms,\)"):
-        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(**kwargs)
-
+# Parameters that are identical across every test.
+rc = 3.0
+phi_min = 1e-5
+beta_min = 1e-5
 
-def test_chebyshev_invalid_r0_ndim():
-    """Raise ValueError when ``r0`` is not a 5D array."""
-    kwargs = good_kwargs()
-    kwargs["r0"] = numpy.zeros((2, 2, 2, 2), dtype=numpy.float64)
-    with pytest.raises(ValueError, match=r"r0 must be a 5D array\."):
-        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(**kwargs)
+terms = numpy.array(
+    [
+        [0, 0, 0, 0, 0, 0],
+        [0, 0, 0, 1, 0, 0],
+        [1, 0, 0, 0, 0, 0],
+        [1, 0, 0, 1, 0, 0],
+    ],
+    dtype=numpy.uint32,
+)
+coeffs = numpy.array([1.0, 0.25, 1.5, -1.0], dtype=numpy.float64)
 
+# r0 data: shape (3, 2, 3, 2, 3) = 108 values.
+r0_data = numpy.array([1, 2.1, 3.2] * 36, dtype=numpy.float64).reshape(3, 2, 3, 2, 3)
 
-def test_chebyshev_invalid_r0_shape():
-    """Raise ValueError when ``r0`` has a dimension with less than 2 points."""
-    kwargs = good_kwargs()
-    kwargs["r0"] = numpy.zeros((2, 2, 1, 2, 2), dtype=numpy.float64)
-    with pytest.raises(
-        ValueError,
-        match=r"r0 must have at least 2 grid points along each of its 5 dimensions\.",
-    ):
-        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential(**kwargs)
+PotentialTestCase = collections.namedtuple(
+    "PotentialTestCase",
+    [
+        "name",
+        "potential",
+        "r0",
+        "rho",
+        "theta",
+        "phi",
+        "alpha",
+        "beta",
+        "gamma",
+        "energy",
+        "force",
+        "torque",
+        "zero_output",
+    ],
+)
 
+potential_tests = []
 
 # Null symmetry
-def test_chebyshev_force_torque_energy_null_symmetry(
-    simulation_factory, two_particle_snapshot_factory
-):
-    """Force, torque, and energy with no symmetry reduction."""
-    # r0 interpolator aligned with r0_data's shape (3, 2, 3, 2, 3).
-    theta_grid = numpy.linspace(0, 2 * numpy.pi, 3)
-    phi_grid = numpy.linspace(phi_min, numpy.pi - phi_min, 2)
-    alpha_grid = numpy.linspace(0, 2 * numpy.pi, 3)
-    beta_grid = numpy.linspace(beta_min, numpy.pi - beta_min, 2)
-    gamma_grid = numpy.linspace(0, 2 * numpy.pi, 3)
-
-    r0_interp = RegularGridInterpolator(
-        (theta_grid, phi_grid, alpha_grid, beta_grid, gamma_grid),
-        r0_data,
-        method="linear",
-        bounds_error=False,
-        fill_value=numpy.nan,
-    )
-
-    def run(rho, theta, phi, alpha, beta, gamma):
-        r0 = float(r0_interp(numpy.array([theta, phi, alpha, beta, gamma]))[0])
-        return build_simulation(
-            simulation_factory,
-            two_particle_snapshot_factory,
-            hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential,
-            r0,
-            rho,
-            theta,
-            phi,
-            alpha,
-            beta,
-            gamma,
-        )
-
-    # point 1: interior
-    sim, pot = run(
-        rho=0.2,
-        theta=numpy.pi / 4,
-        phi=numpy.pi / 4,
-        alpha=2 * numpy.pi / 5,
-        beta=numpy.pi / 2,
-        gamma=numpy.pi,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=-0.41,
-        expected_force=numpy.array([-1.324, -1.324, -1.872]),
-        expected_torque=numpy.array([0.944, -0.307, -0.271]),
-    )
-
-    # point 2: rho < 0 (clamped for derivatives, extrapolated for energy)
-    sim, pot = run(
-        rho=-0.1,
-        theta=numpy.pi / 4,
-        phi=numpy.pi / 4,
-        alpha=2 * numpy.pi / 5,
-        beta=numpy.pi / 2,
-        gamma=numpy.pi,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=-1.67,
-        expected_force=numpy.array([-1.906, -1.906, -2.695]),
-        expected_torque=numpy.array([1.226, -0.398, -0.398]),
-    )
-
-    # point 3: phi at upper boundary
-    sim, pot = run(
-        rho=0.0,
-        theta=numpy.pi / 4,
-        phi=numpy.pi - phi_min,
-        alpha=2 * numpy.pi / 15,
-        beta=numpy.pi / 2,
-        gamma=numpy.pi,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=-1.583,
-        expected_force=numpy.array([0.0, 0.0, 3.832]),
-        expected_torque=numpy.array([0.546, -1.226, -0.398]),
-    )
-
-    # point 4: beta at lower boundary
-    sim, pot = run(
-        rho=0.2,
-        theta=numpy.pi / 4,
-        phi=numpy.pi / 4,
-        alpha=2 * numpy.pi / 5,
-        beta=beta_min,
-        gamma=numpy.pi,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=-0.41,
-        expected_force=numpy.array([-1.324, -1.324, -1.872]),
-        expected_torque=numpy.array([120148.0, -39038.6, -0.271]),
-    )
-
-    # point 5: interior with rho near 1
-    sim, pot = run(
-        rho=0.95,
-        theta=numpy.pi / 4,
-        phi=numpy.pi / 6,
-        alpha=2 * numpy.pi / 5,
-        beta=numpy.pi / 2,
-        gamma=numpy.pi / 8,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=2.74,
-        expected_force=numpy.array([-0.174, -0.174, -0.427]),
-        expected_torque=numpy.array([0.207, -0.067, 0.207]),
-    )
-
-    # point 6: rho > 1, beyond surface cutoff - all zeros
-    sim, pot = run(
-        rho=1.05,
-        theta=numpy.pi / 4,
-        phi=numpy.pi / 6,
-        alpha=2 * numpy.pi / 5,
-        beta=numpy.pi / 2,
-        gamma=numpy.pi / 8,
-    )
-    check_zero_pair(sim, pot)
-
+potential_tests += [
+    PotentialTestCase(
+        "null_point_1",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential,
+        2.1,
+        0.2,
+        numpy.pi / 4,
+        numpy.pi / 4,
+        2 * numpy.pi / 5,
+        numpy.pi / 2,
+        numpy.pi,
+        -0.41,
+        (-1.324, -1.324, -1.872),
+        (0.944, -0.307, -0.271),
+        False,
+    ),
+    PotentialTestCase(
+        "null_point_2",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential,
+        2.1,
+        -0.1,
+        numpy.pi / 4,
+        numpy.pi / 4,
+        2 * numpy.pi / 5,
+        numpy.pi / 2,
+        numpy.pi,
+        -1.67,
+        (-1.906, -1.906, -2.695),
+        (1.226, -0.398, -0.398),
+        False,
+    ),
+    PotentialTestCase(
+        "null_point_3",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential,
+        2.1,
+        0.0,
+        numpy.pi / 4,
+        numpy.pi - phi_min,
+        2 * numpy.pi / 15,
+        numpy.pi / 2,
+        numpy.pi,
+        -1.583,
+        (0.0, 0.0, 3.832),
+        (0.546, -1.226, -0.398),
+        False,
+    ),
+    PotentialTestCase(
+        "null_point_4",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential,
+        2.1,
+        0.2,
+        numpy.pi / 4,
+        numpy.pi / 4,
+        2 * numpy.pi / 5,
+        beta_min,
+        numpy.pi,
+        -0.41,
+        (-1.324, -1.324, -1.872),
+        (120148.0, -39038.6, -0.271),
+        False,
+    ),
+    PotentialTestCase(
+        "null_point_5",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential,
+        1.1375,
+        0.95,
+        numpy.pi / 4,
+        numpy.pi / 6,
+        2 * numpy.pi / 5,
+        numpy.pi / 2,
+        numpy.pi / 8,
+        2.74,
+        (-0.174, -0.174, -0.427),
+        (0.207, -0.067, 0.207),
+        False,
+    ),
+    PotentialTestCase(
+        "null_point_6",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotential,
+        1.1375,
+        1.05,
+        numpy.pi / 4,
+        numpy.pi / 6,
+        2 * numpy.pi / 5,
+        numpy.pi / 2,
+        numpy.pi / 8,
+        0.0,
+        None,
+        None,
+        True,
+    ),
+]
 
 # Cube symmetry
-def test_chebyshev_force_torque_energy_cube_symmetry(
-    simulation_factory, two_particle_snapshot_factory
-):
-    """Force, torque, and energy with cube symmetry reduction.
-
-    Reduced domain: theta in [0, pi/4], phi in [1e-5, pi/2],
-    alpha in [0, 2 pi], beta in [1e-5, arccos(1/sqrt(3))],
-    gamma in [0, pi/2]."""
-
-    def run(r0, rho, theta, phi, alpha, beta, gamma):
-        return build_simulation(
-            simulation_factory,
-            two_particle_snapshot_factory,
-            hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialCube,
-            r0,
-            rho,
-            theta,
-            phi,
-            alpha,
-            beta,
-            gamma,
-        )
-
-    # point 1: interior
-    sim, pot = run(
-        r0=2.46666667,
-        rho=0.2,
-        theta=numpy.pi / 8,
-        phi=numpy.pi / 5,
-        alpha=2 * numpy.pi / 5,
-        beta=numpy.pi / 6,
-        gamma=numpy.pi / 3,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=-0.41,
-        expected_force=numpy.array([-1.335, -0.553, -1.989]),
-        expected_torque=numpy.array([7.395, -2.403, -0.271]),
-    )
-
-    # point 2: rho < 0 (clamped for derivatives, extrapolated for energy)
-    sim, pot = run(
-        r0=2.46666667,
-        rho=-0.1,
-        theta=numpy.pi / 8,
-        phi=numpy.pi / 5,
-        alpha=2 * numpy.pi / 5,
-        beta=numpy.pi / 6,
-        gamma=numpy.pi / 3,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=-1.67,
-        expected_force=numpy.array([-1.875, -0.777, -2.793]),
-        expected_torque=numpy.array([9.579, -3.113, -0.398]),
-    )
-
-    # point 3: interior with rho=0
-    sim, pot = run(
-        r0=2.62072583,
-        rho=0.0,
-        theta=2 * numpy.pi / 7,
-        phi=numpy.pi / 9,
-        alpha=2 * numpy.pi / 15,
-        beta=numpy.pi / 4,
-        gamma=numpy.pi / 5,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=-0.751,
-        expected_force=numpy.array([-0.518, -0.65, -2.285]),
-        expected_torque=numpy.array([4.223, -0.398, 3.08]),
-    )
-
-    # point 4: theta, phi, and beta out of bound
-    sim, pot = run(
-        r0=2.11254315,
-        rho=0.0,
-        theta=2 * numpy.pi / 7,
-        phi=2 * numpy.pi / 3,
-        alpha=2 * numpy.pi / 15,
-        beta=numpy.pi / 3,
-        gamma=numpy.pi / 5,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=-0.427,
-        expected_force=numpy.array([-1.256, -1.575, 1.163]),
-        expected_torque=numpy.array([4.872, -0.906, 0.398]),
-    )
-
-    # point 5: beta at lower boundary, gamma outside the domain
-    sim, pot = run(
-        r0=1.0,
-        rho=0.2,
-        theta=numpy.pi / 4,
-        phi=numpy.pi / 4,
-        alpha=2 * numpy.pi / 5,
-        beta=beta_min,
-        gamma=numpy.pi,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=-0.41,
-        expected_force=numpy.array([-2.023, -2.023, -2.861]),
-        expected_torque=numpy.array([6.31798953e05, -2.05283924e05, -0.271]),
-    )
+potential_tests += [
+    PotentialTestCase(
+        "cube_point_1",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialCube,
+        2.46666667,
+        0.2,
+        numpy.pi / 8,
+        numpy.pi / 5,
+        2 * numpy.pi / 5,
+        numpy.pi / 6,
+        numpy.pi / 3,
+        -0.41,
+        (-1.335, -0.553, -1.989),
+        (7.395, -2.403, -0.271),
+        False,
+    ),
+    PotentialTestCase(
+        "cube_point_2",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialCube,
+        2.46666667,
+        -0.1,
+        numpy.pi / 8,
+        numpy.pi / 5,
+        2 * numpy.pi / 5,
+        numpy.pi / 6,
+        numpy.pi / 3,
+        -1.67,
+        (-1.875, -0.777, -2.793),
+        (9.579, -3.113, -0.398),
+        False,
+    ),
+    PotentialTestCase(
+        "cube_point_3",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialCube,
+        2.62072583,
+        0.0,
+        2 * numpy.pi / 7,
+        numpy.pi / 9,
+        2 * numpy.pi / 15,
+        numpy.pi / 4,
+        numpy.pi / 5,
+        -0.751,
+        (-0.518, -0.65, -2.285),
+        (4.223, -0.398, 3.08),
+        False,
+    ),
+    PotentialTestCase(
+        "cube_point_4",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialCube,
+        2.11254315,
+        0.0,
+        2 * numpy.pi / 7,
+        2 * numpy.pi / 3,
+        2 * numpy.pi / 15,
+        numpy.pi / 3,
+        numpy.pi / 5,
+        -0.427,
+        (-1.256, -1.575, 1.163),
+        (4.872, -0.906, 0.398),
+        False,
+    ),
+    PotentialTestCase(
+        "cube_point_5",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialCube,
+        1.0,
+        0.2,
+        numpy.pi / 4,
+        numpy.pi / 4,
+        2 * numpy.pi / 5,
+        beta_min,
+        numpy.pi,
+        -0.41,
+        (-2.023, -2.023, -2.861),
+        (6.31798953e05, -2.05283924e05, -0.271),
+        False,
+    ),
+    PotentialTestCase(
+        "cube_point_6",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialCube,
+        1.0,
+        0.95,
+        numpy.pi,
+        2 * numpy.pi / 3,
+        2 * numpy.pi / 5,
+        2 * numpy.pi / 3,
+        2 * numpy.pi,
+        2.61,
+        (0.363, 0.0, 0.209),
+        (-1.135, 0.369, -0.207),
+        False,
+    ),
+    PotentialTestCase(
+        "cube_point_7",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialCube,
+        1.0,
+        0.95,
+        0.0,
+        1.0471975511965979,
+        1.8849555921538759,
+        0.5235987755982987,
+        0.0,
+        2.61,
+        (-0.363, -0.0, -0.209),
+        (1.135, 0.369, 0.207),
+        False,
+    ),
+    PotentialTestCase(
+        "cube_point_8",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialCube,
+        1.0,
+        1.05,
+        numpy.pi / 4,
+        numpy.pi / 6,
+        2 * numpy.pi / 5,
+        numpy.pi / 2,
+        numpy.pi / 8,
+        0.0,
+        None,
+        None,
+        True,
+    ),
+]
 
-    # point 6: all angles outside the domain (except alpha)
-    sim, pot = run(
-        r0=1.0,
-        rho=0.95,
-        theta=numpy.pi,
-        phi=2 * numpy.pi / 3,
-        alpha=2 * numpy.pi / 5,
-        beta=2 * numpy.pi / 3,
-        gamma=2 * numpy.pi,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=2.61,
-        expected_force=numpy.array([0.363, 0.0, 0.209]),
-        expected_torque=numpy.array([-1.135, 0.369, -0.207]),
-    )
+# Tetrahedron symmetry
+potential_tests += [
+    PotentialTestCase(
+        "tetrahedron_point_1",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialTetrahedron,
+        2.1,
+        0.2,
+        numpy.pi / 8,
+        numpy.pi / 5,
+        2 * numpy.pi / 5,
+        numpy.pi / 6,
+        numpy.pi / 3,
+        -0.41,
+        (-1.437, -0.595, -2.142),
+        (6.111, -1.985, -0.271),
+        False,
+    ),
+    PotentialTestCase(
+        "tetrahedron_point_2",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialTetrahedron,
+        2.1,
+        -0.1,
+        numpy.pi / 8,
+        numpy.pi / 5,
+        2 * numpy.pi / 5,
+        numpy.pi / 6,
+        numpy.pi / 3,
+        -1.67,
+        (-2.07, -0.857, -3.084),
+        (8.013, -2.604, -0.398),
+        False,
+    ),
+    PotentialTestCase(
+        "tetrahedron_point_3",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialTetrahedron,
+        1.66,
+        0.0,
+        3 * numpy.pi / 2,
+        phi_min,
+        2 * numpy.pi / 15,
+        numpy.pi / 4,
+        numpy.pi / 5,
+        -0.75,
+        (0.0, 0.0, -3.182),
+        (2.084, -4.680, -0.398),
+        False,
+    ),
+    PotentialTestCase(
+        "tetrahedron_point_4",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialTetrahedron,
+        2.1,
+        0.65,
+        numpy.pi / 4,
+        numpy.pi / 4,
+        2 * numpy.pi / 5,
+        beta_min,
+        numpy.pi,
+        1.48,
+        (-0.649, -0.649, -0.917),
+        (1.54802229e05, -5.02982930e04, 0.016),
+        False,
+    ),
+    PotentialTestCase(
+        "tetrahedron_point_5",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialTetrahedron,
+        2.1,
+        0.2,
+        numpy.pi / 4,
+        numpy.pi - phi_min,
+        2 * numpy.pi / 5,
+        numpy.pi - beta_min,
+        numpy.pi,
+        -0.41,
+        (0.0, 0.0, 2.647),
+        (2.57516972e05, -8.36723360e04, -0.271),
+        False,
+    ),
+    PotentialTestCase(
+        "tetrahedron_point_6",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialTetrahedron,
+        1.0,
+        0.95,
+        numpy.pi,
+        2 * numpy.pi / 3,
+        2 * numpy.pi / 5,
+        2 * numpy.pi / 3,
+        2 * numpy.pi,
+        1.873,
+        (0.146, 0.0, 0.084),
+        (0.371, -0.120, 0.207),
+        False,
+    ),
+    PotentialTestCase(
+        "tetrahedron_point_7",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialTetrahedron,
+        1.0,
+        0.95,
+        1.0471975511965979,
+        2.0943951023931953,
+        5.445427266222309,
+        2.0943951023931953,
+        0.0,
+        1.873,
+        (-0.073, -0.127, 0.084),
+        (-0.29, -0.261, 0.207),
+        False,
+    ),
+    PotentialTestCase(
+        "tetrahedron_point_8",
+        hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialTetrahedron,
+        1.0,
+        1.05,
+        numpy.pi / 4,
+        numpy.pi / 6,
+        2 * numpy.pi / 5,
+        numpy.pi / 2,
+        numpy.pi / 8,
+        0.0,
+        None,
+        None,
+        True,
+    ),
+]
 
-    # point 7: equivalent to point 6 but already in the reduced domain
-    sim, pot = run(
-        r0=1.0,
-        rho=0.95,
-        theta=0.0,
-        phi=1.0471975511965979,
-        alpha=1.8849555921538759,
-        beta=0.5235987755982987,
-        gamma=0.0,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=2.61,
-        expected_force=numpy.array([-0.363, -0.0, -0.209]),
-        expected_torque=numpy.array([1.135, 0.369, 0.207]),
-    )
 
-    # point 8: rho > 1, beyond surface cutoff - all zeros
-    sim, pot = run(
-        r0=1.0,
-        rho=1.05,
-        theta=numpy.pi / 4,
-        phi=numpy.pi / 6,
-        alpha=2 * numpy.pi / 5,
-        beta=numpy.pi / 2,
-        gamma=numpy.pi / 8,
-    )
-    check_zero_pair(sim, pot)
+def rho_to_r(rho, r0, rc):
+    """Invert rho = (1/r - 1/r0) / (1/(r0+rc) - 1/r0)."""
+    inv_r0 = 1.0 / r0
+    inv_r0_rc = 1.0 / (r0 + rc)
+    inv_r = rho * (inv_r0_rc - inv_r0) + inv_r0
+    return 1.0 / inv_r
 
 
-# Tetrahedron symmetry
-def test_chebyshev_force_torque_energy_tetrahedron_symmetry(
-    simulation_factory, two_particle_snapshot_factory
+@pytest.mark.parametrize("potential_test", potential_tests, ids=lambda x: x.name)
+def test_energy_force_and_torque(
+    simulation_factory, two_particle_snapshot_factory, potential_test
 ):
-    """Force, torque, and energy with tetrahedron symmetry reduction.
-
-    Reduced domain: theta in [0, 2 pi/3], phi in [1e-5, pi],
-    alpha in [0, 2 pi], beta in [1e-5, pi], gamma in [0, 2 pi/3]."""
-
-    def run(r0, rho, theta, phi, alpha, beta, gamma):
-        return build_simulation(
-            simulation_factory,
-            two_particle_snapshot_factory,
-            hoomd.azplugins.pair.ChebyshevAnisotropicPairPotentialTetrahedron,
-            r0,
-            rho,
-            theta,
-            phi,
-            alpha,
-            beta,
-            gamma,
-        )
-
-    # point 1: interior
-    sim, pot = run(
-        r0=2.1,
-        rho=0.2,
-        theta=numpy.pi / 8,
-        phi=numpy.pi / 5,
-        alpha=2 * numpy.pi / 5,
-        beta=numpy.pi / 6,
-        gamma=numpy.pi / 3,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=-0.41,
-        expected_force=numpy.array([-1.437, -0.595, -2.142]),
-        expected_torque=numpy.array([6.111, -1.985, -0.271]),
-    )
+    """Test energy, force, and torque evaluation."""
+    snap = two_particle_snapshot_factory()
+    if snap.communicator.rank == 0:
+        r = rho_to_r(potential_test.rho, potential_test.r0, rc)
 
-    # point 2: rho < 0 (clamped for derivatives, extrapolated for energy)
-    sim, pot = run(
-        r0=2.1,
-        rho=-0.1,
-        theta=numpy.pi / 8,
-        phi=numpy.pi / 5,
-        alpha=2 * numpy.pi / 5,
-        beta=numpy.pi / 6,
-        gamma=numpy.pi / 3,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=-1.67,
-        expected_force=numpy.array([-2.07, -0.857, -3.084]),
-        expected_torque=numpy.array([8.013, -2.604, -0.398]),
-    )
+        dx = r * numpy.sin(potential_test.phi) * numpy.cos(potential_test.theta)
+        dy = r * numpy.sin(potential_test.phi) * numpy.sin(potential_test.theta)
+        dz = r * numpy.cos(potential_test.phi)
 
-    # point 3: phi at the boundary and theta is outside the domain
-    sim, pot = run(
-        r0=1.66,
-        rho=0.0,
-        theta=3 * numpy.pi / 2,
-        phi=phi_min,
-        alpha=2 * numpy.pi / 15,
-        beta=numpy.pi / 4,
-        gamma=numpy.pi / 5,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=-0.75,
-        expected_force=numpy.array([0.0, 0.0, -3.182]),
-        expected_torque=numpy.array([2.084, -4.680, -0.398]),
-    )
+        q_j = Rotation.from_euler(
+            "ZXZ",
+            [potential_test.alpha, potential_test.beta, potential_test.gamma],
+        ).as_quat(scalar_first=True)
 
-    # point 4: beta at the bound and gamma out of bound
-    sim, pot = run(
-        r0=2.1,
-        rho=0.65,
-        theta=numpy.pi / 4,
-        phi=numpy.pi / 4,
-        alpha=2 * numpy.pi / 5,
-        beta=beta_min,
-        gamma=numpy.pi,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=1.48,
-        expected_force=numpy.array([-0.649, -0.649, -0.917]),
-        expected_torque=numpy.array([1.54802229e05, -5.02982930e04, 0.016]),
-    )
+        snap.particles.position[:] = [[0.0, 0.0, 0.0], [-dx, -dy, -dz]]
+        snap.particles.orientation[:] = [[1, 0, 0, 0], q_j]
+        snap.particles.moment_inertia[:] = [0.1, 0.1, 0.1]
 
-    # point 5: phi and beta at boundary, gamma outside the domain
-    sim, pot = run(
-        r0=2.1,
-        rho=0.2,
-        theta=numpy.pi / 4,
-        phi=numpy.pi - phi_min,
-        alpha=2 * numpy.pi / 5,
-        beta=numpy.pi - beta_min,
-        gamma=numpy.pi,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=-0.41,
-        expected_force=numpy.array([0.0, 0.0, 2.647]),
-        expected_torque=numpy.array([2.57516972e05, -8.36723360e04, -0.271]),
-    )
+    sim = simulation_factory(snap)
 
-    # point 6: all angles outside the domain (except alpha)
-    sim, pot = run(
-        r0=1.0,
-        rho=0.95,
-        theta=numpy.pi,
-        phi=2 * numpy.pi / 3,
-        alpha=2 * numpy.pi / 5,
-        beta=2 * numpy.pi / 3,
-        gamma=2 * numpy.pi,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=1.873,
-        expected_force=numpy.array([0.146, 0.0, 0.084]),
-        expected_torque=numpy.array([0.371, -0.120, 0.207]),
-    )
+    integrator = hoomd.md.Integrator(dt=0.001)
+    nve = hoomd.md.methods.ConstantVolume(hoomd.filter.All())
+    integrator.methods = [nve]
 
-    # point 7: equivalent to point 6 but already in the reduced domain
-    sim, pot = run(
-        r0=1.0,
-        rho=0.95,
-        theta=1.0471975511965979,
-        phi=2.0943951023931953,
-        alpha=5.445427266222309,
-        beta=2.0943951023931953,
-        gamma=0.0,
-    )
-    check_pair(
-        sim,
-        pot,
-        expected_energy=1.873,
-        expected_force=numpy.array([-0.073, -0.127, 0.084]),
-        expected_torque=numpy.array([-0.29, -0.261, 0.207]),
+    potential = potential_test.potential(
+        nlist=hoomd.md.nlist.Cell(buffer=1),
+        terms=terms,
+        coeffs=coeffs,
+        r0=r0_data,
+        r_cut=rc,
     )
+    integrator.forces = [potential]
 
-    # point 8: rho > 1, beyond surface cutoff - all zeros
-    sim, pot = run(
-        r0=1.0,
-        rho=1.05,
-        theta=numpy.pi / 4,
-        phi=numpy.pi / 6,
-        alpha=2 * numpy.pi / 5,
-        beta=numpy.pi / 2,
-        gamma=numpy.pi / 8,
-    )
-    check_zero_pair(sim, pot)
+    sim.operations.integrator = integrator
+    sim.run(0)
+    if sim.device.communicator.rank == 0:
+        if potential_test.zero_output:
+            numpy.testing.assert_allclose(potential.energies, [0.0, 0.0], atol=1e-10)
+            numpy.testing.assert_allclose(
+                potential.forces,
+                [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
+                atol=1e-10,
+            )
+            numpy.testing.assert_allclose(
+                potential.torques,
+                [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
+                atol=1e-10,
+            )
+        else:
+            e = potential_test.energy
+            f = numpy.array(potential_test.force)
+            T = numpy.array(potential_test.torque)
+
+            numpy.testing.assert_allclose(
+                potential.energies,
+                [0.5 * e, 0.5 * e],
+                atol=1e-3,
+                rtol=1e-3,
+            )
+            numpy.testing.assert_allclose(
+                potential.forces,
+                [f, -f],
+                atol=1e-3,
+                rtol=1e-3,
+            )
+            numpy.testing.assert_allclose(
+                potential.torques,
+                [T, -T],
+                atol=1e-3,
+                rtol=1e-3,
+            )