From e9ececb389f12fbdb5ca7a8a20cd5dbbdcb397f1 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Fri, 27 Feb 2026 01:43:37 +0000 Subject: [PATCH 01/23] support for dpnp.scipy.linalg.lu() --- dpnp/scipy/linalg/__init__.py | 3 +- dpnp/scipy/linalg/_decomp_lu.py | 142 ++++++++++ dpnp/scipy/linalg/_utils.py | 284 ++++++++++++++++++++ dpnp/tests/test_linalg.py | 457 ++++++++++++++++++++++++++++++++ dpnp/tests/test_sycl_queue.py | 7 + dpnp/tests/test_usm_type.py | 7 + 6 files changed, 899 insertions(+), 1 deletion(-) diff --git a/dpnp/scipy/linalg/__init__.py b/dpnp/scipy/linalg/__init__.py index 3afc08a6fdb9..81eadd890fa9 100644 --- a/dpnp/scipy/linalg/__init__.py +++ b/dpnp/scipy/linalg/__init__.py @@ -35,9 +35,10 @@ """ -from ._decomp_lu import lu_factor, lu_solve +from ._decomp_lu import lu, lu_factor, lu_solve __all__ = [ + "lu", "lu_factor", "lu_solve", ] diff --git a/dpnp/scipy/linalg/_decomp_lu.py b/dpnp/scipy/linalg/_decomp_lu.py index 292d7fffe4b4..85161cec2c6f 100644 --- a/dpnp/scipy/linalg/_decomp_lu.py +++ b/dpnp/scipy/linalg/_decomp_lu.py @@ -46,11 +46,153 @@ ) from ._utils import ( + dpnp_lu, dpnp_lu_factor, dpnp_lu_solve, ) +def lu(a, permute_l=False, overwrite_a=False, check_finite=True, + p_indices=False): + """ + Compute LU decomposition of a matrix with partial pivoting. + + The decomposition satisfies:: + + A = P @ L @ U + + where `P` is a permutation matrix, `L` is lower triangular with unit + diagonal elements, and `U` is upper triangular. If `permute_l` is set to + ``True`` then `L` is returned already permuted and hence satisfying + ``A = L @ U``. + + For full documentation refer to :obj:`scipy.linalg.lu`. + + Parameters + ---------- + a : (..., M, N) {dpnp.ndarray, usm_ndarray} + Input array to decompose. + permute_l : bool, optional + Perform the multiplication ``P @ L`` (Default: do not permute). + + Default: ``False``. + overwrite_a : {None, bool}, optional + Whether to overwrite data in `a` (may increase performance). + + Default: ``False``. + check_finite : {None, bool}, optional + Whether to check that the input matrix contains only finite numbers. + Disabling may give a performance gain, but may result in problems + (crashes, non-termination) if the inputs do contain infinities or NaNs. + + Default: ``True``. + p_indices : bool, optional + If ``True`` the permutation information is returned as row indices + instead of a permutation matrix. + + Default: ``False``. + + Returns + ------- + **(If ``permute_l`` is ``False``)** + + p : (..., M, M) dpnp.ndarray or (..., M) dpnp.ndarray + If `p_indices` is ``False`` (default), the permutation matrix. + The permutation matrix always has a real dtype (``float32`` or + ``float64``) even when `a` is complex, since it only contains + 0s and 1s. + If `p_indices` is ``True``, a 1-D (or batched) array of row + permutation indices such that ``A = L[p] @ U``. + l : (..., M, K) dpnp.ndarray + Lower triangular or trapezoidal matrix with unit diagonal. + ``K = min(M, N)``. + u : (..., K, N) dpnp.ndarray + Upper triangular or trapezoidal matrix. + + **(If ``permute_l`` is ``True``)** + + pl : (..., M, K) dpnp.ndarray + Permuted ``L`` matrix: ``pl = P @ L``. + ``K = min(M, N)``. + u : (..., K, N) dpnp.ndarray + Upper triangular or trapezoidal matrix. + + Notes + ----- + Permutation matrices are costly since they are nothing but row reorder of + ``L`` and hence indices are strongly recommended to be used instead if the + permutation is required. The relation in the 2D case then becomes simply + ``A = L[P, :] @ U``. In higher dimensions, it is better to use `permute_l` + to avoid complicated indexing tricks. + + In the 2D case, if one has the indices however, for some reason, the + permutation matrix is still needed then it can be constructed by + ``dpnp.eye(M)[P, :]``. + + Warning + ------- + This function synchronizes in order to validate array elements + when ``check_finite=True``, and also synchronizes to compute the + permutation from LAPACK pivot indices. + + See Also + -------- + :obj:`dpnp.scipy.linalg.lu_factor` : LU factorize a matrix + (compact representation). + :obj:`dpnp.scipy.linalg.lu_solve` : Solve an equation system using + the LU factorization of a matrix. + + Examples + -------- + >>> import dpnp as np + >>> A = np.array([[2, 5, 8, 7], [5, 2, 2, 8], + ... [7, 5, 6, 6], [5, 4, 4, 8]]) + >>> p, l, u = np.scipy.linalg.lu(A) + >>> np.allclose(A, p @ l @ u) + array(True) + + Retrieve the permutation as row indices with ``p_indices=True``: + + >>> p, l, u = np.scipy.linalg.lu(A, p_indices=True) + >>> p + array([1, 3, 0, 2]) + >>> np.allclose(A, l[p] @ u) + array(True) + + Return the permuted ``L`` directly with ``permute_l=True``: + + >>> pl, u = np.scipy.linalg.lu(A, permute_l=True) + >>> np.allclose(A, pl @ u) + array(True) + + Non-square matrices are supported: + + >>> B = np.array([[1, 2, 3], [4, 5, 6]]) + >>> p, l, u = np.scipy.linalg.lu(B) + >>> np.allclose(B, p @ l @ u) + array(True) + + Batched input: + + >>> C = np.random.randn(3, 2, 4, 4) + >>> p, l, u = np.scipy.linalg.lu(C) + >>> np.allclose(C, p @ l @ u) + array(True) + + """ + + dpnp.check_supported_arrays_type(a) + assert_stacked_2d(a) + + return dpnp_lu( + a, + overwrite_a=overwrite_a, + check_finite=check_finite, + p_indices=p_indices, + permute_l=permute_l, + ) + + def lu_factor(a, overwrite_a=False, check_finite=True): """ Compute the pivoted LU decomposition of `a` matrix. diff --git a/dpnp/scipy/linalg/_utils.py b/dpnp/scipy/linalg/_utils.py index f00db6fdfb92..0f6df9289a54 100644 --- a/dpnp/scipy/linalg/_utils.py +++ b/dpnp/scipy/linalg/_utils.py @@ -83,6 +83,64 @@ def _align_lu_solve_broadcast(lu, b): return lu, b +def _apply_permutation_to_rows(mat, perm_indices): + """ + Apply a permutation to the rows (axis=-2) of a matrix. + + Returns ``out`` such that + ``out[..., i, :] = mat[..., perm_indices[..., i], :]``. + + For 2-D inputs this is equivalent to ``mat[perm_indices]`` (a single + device gather). For batched inputs :func:`dpnp.take_along_axis` is + used so the operation stays entirely on the device. + + Parameters + ---------- + mat : dpnp.ndarray, shape (..., M, N) + Matrix whose rows are to be permuted. + perm_indices : dpnp.ndarray, shape (..., M) + Permutation indices (dtype int64). + + Returns + ------- + out : dpnp.ndarray, shape (..., M, N) + Row-permuted matrix. + """ + + if perm_indices.ndim == 1: + # 2-D case: simple fancy indexing, single kernel launch. + return mat[perm_indices] + + # Batched case: ensure *mat* has the same batch dimensions as + # *perm_indices*. This is needed, for example, when permuting + # a shared identity matrix across a batch. + target_shape = perm_indices.shape[:-1] + mat.shape[-2:] + if mat.shape != target_shape: + mat = dpnp.broadcast_to(mat, target_shape) + + # Expand (..., M) → (..., M, 1), then broadcast to the full shape + # of *mat* so take_along_axis can gather along axis -2. + idx = dpnp.expand_dims(perm_indices, axis=-1) + idx = dpnp.broadcast_to(idx, target_shape).copy() + return dpnp.take_along_axis(mat, idx, axis=-2) + + +def _get_real_dtype(res_type): + """ + Return the real floating-point counterpart of *res_type*. + + SciPy uses the real dtype for the permutation matrix ``P`` even when + the input is complex (``P`` only contains 0s and 1s). + + ``float32`` and ``complex64`` → ``float32``; + ``float64`` and ``complex128`` → ``float64``. + """ + + if dpnp.issubdtype(res_type, dpnp.complexfloating): + return dpnp.float32 if dpnp.dtype(res_type).itemsize <= 8 else dpnp.float64 + return res_type + + def _batched_lu_factor_scipy(a, res_type): # pylint: disable=too-many-locals """SciPy-compatible LU factorization for batched inputs.""" @@ -338,6 +396,71 @@ def _map_trans_to_mkl(trans): raise ValueError("`trans` must be 0 (N), 1 (T), or 2 (C)") +def _pivots_to_permutation(piv, m): + """ + Convert 0-based LAPACK pivot indices (sequential row swaps) + to a permutation array. + + The returned permutation ``perm`` satisfies ``A[perm] = L @ U`` + (i.e. the forward row-permutation produced by LAPACK). + + The computation is performed entirely on the device. A host-side + Python loop of ``K = min(M, N)`` iterations drives the sequential + swap logic, but each iteration only launches device kernels + (:func:`dpnp.take_along_axis` for gather, + :func:`dpnp.put_along_axis` for scatter); **no data is transferred + between host and device**. + + .. note:: + + A future custom SYCL kernel could fuse all ``K`` swap steps + into a single launch to eliminate per-step kernel overhead. + + Parameters + ---------- + piv : dpnp.ndarray, shape (..., K) + 0-based pivot indices as returned by :obj:`dpnp_lu_factor`. + m : int + Number of rows of the original matrix. + + Returns + ------- + perm : dpnp.ndarray, shape (..., M), dtype int64 + Permutation indices. + """ + + batch_shape = piv.shape[:-1] + k = piv.shape[-1] + + # Initialise the identity permutation on the device. + perm = dpnp.broadcast_to( + dpnp.arange( + m, + dtype=dpnp.int64, + usm_type=piv.usm_type, + sycl_queue=piv.sycl_queue, + ), + (*batch_shape, m), + ).copy() + + # Apply sequential row swaps entirely on the device. + # Each iteration launches a small number of device kernels (gather + + # slice-assign + scatter) but never transfers data to the host. + for i in range(k): + # Pivot target for step *i*: shape (..., 1) + j = piv[..., i : i + 1] + + # Gather the two values to be swapped. + val_i = perm[..., i : i + 1].copy() # slice (free) + val_j = dpnp.take_along_axis(perm, j, axis=-1) # gather + + # Perform the swap. + perm[..., i : i + 1] = val_j # slice assign + dpnp.put_along_axis(perm, j, val_i, axis=-1) # scatter + + return perm + + def dpnp_lu_factor(a, overwrite_a=False, check_finite=True): """ dpnp_lu_factor(a, overwrite_a=False, check_finite=True) @@ -432,6 +555,167 @@ def dpnp_lu_factor(a, overwrite_a=False, check_finite=True): return (a_h, ipiv_h) +def dpnp_lu( + a, + overwrite_a=False, + check_finite=True, + p_indices=False, + permute_l=False, +): + """ + dpnp_lu(a, overwrite_a=False, check_finite=True, p_indices=False, + permute_l=False) + + Compute pivoted LU decomposition and return separate P, L, U matrices + (SciPy-compatible behavior). + + This function mimics the behavior of `scipy.linalg.lu` including + support for `permute_l`, `p_indices`, `overwrite_a`, and `check_finite`. + + """ + + a_sycl_queue = a.sycl_queue + a_usm_type = a.usm_type + m, n = a.shape[-2:] + k = min(m, n) + batch_shape = a.shape[:-2] + + res_type = _common_type(a) + + # The permutation matrix P uses a real dtype (SciPy convention): + # P only contains 0s and 1s, so complex storage would be wasteful. + real_type = _get_real_dtype(res_type) + + # ---- Fast path: scalar (1x1) matrices ---- + # For 1x1 input, P = I, L = I, U = A. This avoids invoking LAPACK + # entirely (matches SciPy's scalar fast path). + if m == 1 and n == 1: + if check_finite: + if not dpnp.isfinite(a).all(): + raise ValueError("array must not contain infs or NaNs") + + _L = dpnp.ones( + a.shape, + dtype=res_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + _U = dpnp.array(a, dtype=res_type) + + if permute_l: + return _L.copy(), _U + + if p_indices: + _p = dpnp.zeros( + (*batch_shape, 1), + dtype=dpnp.int64, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + return _p, _L, _U + + _P = dpnp.ones( + a.shape, + dtype=real_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + return _P, _L, _U + + # ---- Fast path: empty arrays ---- + if a.size == 0: + _L = dpnp.empty( + (*batch_shape, m, k), + dtype=res_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + _U = dpnp.empty( + (*batch_shape, k, n), + dtype=res_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + + if permute_l: + return _L, _U + + if p_indices: + _p = dpnp.empty( + (*batch_shape, m), + dtype=dpnp.int64, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + return _p, _L, _U + + _P = dpnp.empty( + (*batch_shape, m, m), + dtype=real_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + return _P, _L, _U + + # ---- General case: LAPACK factorization ---- + lu_compact, piv = dpnp_lu_factor( + a, overwrite_a=overwrite_a, check_finite=check_finite + ) + + # ---- Extract L: lower-triangular with unit diagonal ---- + # L has shape (..., M, K). + _L = dpnp.tril(lu_compact[..., :, :k], k=-1) + _L += dpnp.eye( + m, + k, + dtype=lu_compact.dtype, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + + # ---- Extract U: upper-triangular ---- + # U has shape (..., K, N). + _U = dpnp.triu(lu_compact[..., :k, :]) + + # ---- Convert pivot indices → row permutation ---- + # ``perm`` (forward): A[perm] = L @ U. + # This is the only step that requires a host transfer because the + # sequential swap semantics of LAPACK pivots cannot be parallelised. + # Only the small pivot array (min(M, N) elements per slice) is + # transferred; all subsequent work stays on the device. + perm = _pivots_to_permutation(piv, m) + + # ``inv_perm`` (inverse): A = L[inv_perm] @ U. + # This is SciPy's ``p_indices`` convention. + # ``dpnp.argsort`` is an efficient on-device O(M log M) operation + # that avoids a second host round-trip. + inv_perm = dpnp.argsort(perm, axis=-1).astype(dpnp.int64) + + if permute_l: + # Return (PL, U) where PL = P @ L = L[inv_perm]. + # A = PL @ U directly. + _PL = _apply_permutation_to_rows(_L, inv_perm) + return _PL, _U + + if p_indices: + # SciPy convention: A = L[inv_perm] @ U. + return inv_perm, _L, _U + + # ---- Build full permutation matrix P = I[inv_perm] ---- + # P has shape (..., M, M) with real dtype (SciPy convention). + # The gather from an identity matrix is efficient on device: + # each output row selects one row of the identity (one hot encoding). + _I = dpnp.eye( + m, + dtype=real_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + _P = _apply_permutation_to_rows(_I, inv_perm) + + return _P, _L, _U + + def dpnp_lu_solve(lu, piv, b, trans=0, overwrite_b=False, check_finite=True): """ dpnp_lu_solve(lu, piv, b, trans=0, overwrite_b=False, check_finite=True) diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index 31d99d71ce49..e92b9f65c182 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -2605,6 +2605,463 @@ def test_invalid_shapes(self, a_shape, b_shape): dpnp.scipy.linalg.lu_solve((lu, piv), b, check_finite=False) +class TestLu: + @staticmethod + def _make_nonsingular_np(shape, dtype, order): + A = generate_random_numpy_array(shape, dtype, order) + m, n = shape + k = min(m, n) + for i in range(k): + off = numpy.sum(numpy.abs(A[i, :n])) - numpy.abs(A[i, i]) + A[i, i] = A.dtype.type(off + 1.0) + return A + + @pytest.mark.parametrize( + "shape", + [(1, 1), (2, 2), (3, 3), (1, 5), (5, 1), (2, 5), (5, 2)], + ) + @pytest.mark.parametrize("order", ["C", "F"]) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_lu_default(self, shape, order, dtype): + a_np = self._make_nonsingular_np(shape, dtype, order) + a_dp = dpnp.array(a_np, order=order) + + P, L, U = dpnp.scipy.linalg.lu(a_dp) + + m, n = shape + k = min(m, n) + assert P.shape == (m, m) + assert L.shape == (m, k) + assert U.shape == (k, n) + + A_cast = a_dp.astype(L.dtype, copy=False) + A_rec = P @ L @ U + assert dpnp.allclose(A_rec, A_cast, rtol=1e-6, atol=1e-6) + + @pytest.mark.parametrize( + "shape", + [(1, 1), (2, 2), (3, 3), (1, 5), (5, 1), (2, 5), (5, 2)], + ) + @pytest.mark.parametrize("order", ["C", "F"]) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_lu_permute_l(self, shape, order, dtype): + a_np = self._make_nonsingular_np(shape, dtype, order) + a_dp = dpnp.array(a_np, order=order) + + PL, U = dpnp.scipy.linalg.lu(a_dp, permute_l=True) + + m, n = shape + k = min(m, n) + assert PL.shape == (m, k) + assert U.shape == (k, n) + + A_cast = a_dp.astype(PL.dtype, copy=False) + A_rec = PL @ U + assert dpnp.allclose(A_rec, A_cast, rtol=1e-6, atol=1e-6) + + @pytest.mark.parametrize( + "shape", + [(1, 1), (2, 2), (3, 3), (1, 5), (5, 1), (2, 5), (5, 2)], + ) + @pytest.mark.parametrize("order", ["C", "F"]) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_lu_p_indices(self, shape, order, dtype): + a_np = self._make_nonsingular_np(shape, dtype, order) + a_dp = dpnp.array(a_np, order=order) + + p, L, U = dpnp.scipy.linalg.lu(a_dp, p_indices=True) + + m, n = shape + k = min(m, n) + assert p.shape == (m,) + assert L.shape == (m, k) + assert U.shape == (k, n) + assert dpnp.issubdtype(p.dtype, dpnp.integer) + + p_np = dpnp.asnumpy(p) + L_np = dpnp.asnumpy(L) + U_np = dpnp.asnumpy(U) + A_rec = L_np[p_np] @ U_np + A_cast = a_dp.astype(L.dtype, copy=False) + assert_allclose(A_rec, dpnp.asnumpy(A_cast), rtol=1e-6, atol=1e-6) + + @pytest.mark.parametrize( + "in_dtype, expected_p_dtype", + [ + (dpnp.float32, dpnp.float32), + (dpnp.float64, dpnp.float64), + (dpnp.complex64, dpnp.float32), + (dpnp.complex128, dpnp.float64), + ], + ) + def test_p_matrix_dtype(self, in_dtype, expected_p_dtype): + if in_dtype in (dpnp.float64, dpnp.complex128): + if not has_support_aspect64(): + pytest.skip("fp64 not supported on this device") + + a_np = self._make_nonsingular_np((4, 4), in_dtype, "F") + a_dp = dpnp.array(a_np, order="F") + P, L, U = dpnp.scipy.linalg.lu(a_dp) + + assert P.dtype == expected_p_dtype + assert dpnp.issubdtype(P.dtype, dpnp.floating) + + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + def test_p_indices_dtype(self, dtype): + a_np = self._make_nonsingular_np((4, 4), dtype, "F") + a_dp = dpnp.array(a_np, order="F") + p, _, _ = dpnp.scipy.linalg.lu(a_dp, p_indices=True) + assert dpnp.issubdtype(p.dtype, dpnp.integer) + + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + def test_l_structure(self, dtype): + a_np = self._make_nonsingular_np((5, 5), dtype, "F") + a_dp = dpnp.array(a_np, order="F") + _, L, _ = dpnp.scipy.linalg.lu(a_dp) + L_np = dpnp.asnumpy(L) + + # unit diagonal + diag_abs = numpy.abs(numpy.diag(L_np)) + assert_allclose(diag_abs, numpy.ones(5, dtype=diag_abs.dtype)) + # lower triangular + assert_allclose(numpy.triu(L_np, 1), numpy.zeros_like(L_np)) + + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + def test_u_upper_triangular(self, dtype): + a_np = self._make_nonsingular_np((5, 5), dtype, "F") + a_dp = dpnp.array(a_np, order="F") + _, _, U = dpnp.scipy.linalg.lu(a_dp) + U_np = dpnp.asnumpy(U) + assert_allclose(numpy.tril(U_np, -1), numpy.zeros_like(U_np)) + + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + def test_p_is_permutation(self, dtype): + a_np = self._make_nonsingular_np((5, 5), dtype, "F") + a_dp = dpnp.array(a_np, order="F") + P, _, _ = dpnp.scipy.linalg.lu(a_dp) + P_np = dpnp.asnumpy(P) + + assert_allclose(P_np.sum(axis=0), numpy.ones(5, dtype=P_np.dtype)) + assert_allclose(P_np.sum(axis=1), numpy.ones(5, dtype=P_np.dtype)) + assert_allclose(P_np.T @ P_np, numpy.eye(5, dtype=P_np.dtype), atol=1e-15) + + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + def test_modes_consistency(self, dtype): + a_np = self._make_nonsingular_np((5, 5), dtype, "F") + a_dp = dpnp.array(a_np, order="F") + + P, L, U = dpnp.scipy.linalg.lu(a_dp) + PL, U2 = dpnp.scipy.linalg.lu(a_dp, permute_l=True) + p, L3, U3 = dpnp.scipy.linalg.lu(a_dp, p_indices=True) + + A_cast = a_dp.astype(L.dtype, copy=False) + A1 = P @ L @ U + A2 = PL @ U2 + p_np = dpnp.asnumpy(p) + A3_np = dpnp.asnumpy(L3)[p_np] @ dpnp.asnumpy(U3) + + assert dpnp.allclose(A1, A_cast, rtol=1e-6, atol=1e-6) + assert dpnp.allclose(A2, A_cast, rtol=1e-6, atol=1e-6) + assert_allclose(A3_np, dpnp.asnumpy(A_cast), rtol=1e-6, atol=1e-6) + + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + def test_p_times_l_equals_pl(self, dtype): + a_np = self._make_nonsingular_np((5, 5), dtype, "F") + a_dp = dpnp.array(a_np, order="F") + P, L, _ = dpnp.scipy.linalg.lu(a_dp) + PL, _ = dpnp.scipy.linalg.lu(a_dp, permute_l=True) + assert dpnp.allclose(P @ L, PL, rtol=1e-12, atol=1e-12) + + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + def test_p_indices_to_matrix(self, dtype): + a_np = self._make_nonsingular_np((5, 5), dtype, "F") + a_dp = dpnp.array(a_np, order="F") + P, _, _ = dpnp.scipy.linalg.lu(a_dp) + p, _, _ = dpnp.scipy.linalg.lu(a_dp, p_indices=True) + P_from_idx = dpnp.eye(5, dtype=P.dtype)[p] + assert dpnp.allclose(P_from_idx, P, rtol=1e-15, atol=1e-15) + + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + def test_overwrite_a_false(self, dtype): + a_dp = dpnp.array([[4, 3], [6, 3]], dtype=dtype, order="F") + a_dp_orig = a_dp.copy() + dpnp.scipy.linalg.lu(a_dp, overwrite_a=False) + assert dpnp.allclose(a_dp, a_dp_orig) + + @pytest.mark.parametrize("shape", [(0, 0), (0, 2), (2, 0)]) + def test_empty_inputs(self, shape): + a_dp = dpnp.empty(shape, dtype=dpnp.default_float_type(), order="F") + P, L, U = dpnp.scipy.linalg.lu(a_dp) + m, n = shape + k = min(m, n) + assert P.shape == (m, m) + assert L.shape == (m, k) + assert U.shape == (k, n) + + @pytest.mark.parametrize("shape", [(0, 0), (0, 2), (2, 0)]) + def test_empty_permute_l(self, shape): + a_dp = dpnp.empty(shape, dtype=dpnp.default_float_type(), order="F") + PL, U = dpnp.scipy.linalg.lu(a_dp, permute_l=True) + m, n = shape + k = min(m, n) + assert PL.shape == (m, k) + assert U.shape == (k, n) + + @pytest.mark.parametrize("shape", [(0, 0), (0, 2), (2, 0)]) + def test_empty_p_indices(self, shape): + a_dp = dpnp.empty(shape, dtype=dpnp.default_float_type(), order="F") + p, L, U = dpnp.scipy.linalg.lu(a_dp, p_indices=True) + m, n = shape + k = min(m, n) + assert p.shape == (m,) + assert L.shape == (m, k) + assert U.shape == (k, n) + + @pytest.mark.parametrize( + "sl", + [ + (slice(None, None, 2), slice(None, None, 2)), + (slice(None, None, -1), slice(None, None, -1)), + ], + ) + def test_strided(self, sl): + base = self._make_nonsingular_np( + (7, 7), dpnp.default_float_type(), "F" + ) + a_np = base[sl] + a_dp = dpnp.array(a_np) + + P, L, U = dpnp.scipy.linalg.lu(a_dp) + A_rec = P @ L @ U + assert dpnp.allclose(A_rec, a_dp, rtol=1e-6, atol=1e-6) + + def test_singular_matrix(self): + a_np = numpy.array([[1.0, 2.0], [2.0, 4.0]]) + a_dp = dpnp.array(a_np) + P, L, U = dpnp.scipy.linalg.lu(a_dp) + A_rec = dpnp.asnumpy(P @ L @ U) + assert_allclose(A_rec, a_np, atol=1e-12) + + def test_identity_matrix(self): + n = 4 + I_dp = dpnp.eye(n, dtype=dpnp.default_float_type()) + P, L, U = dpnp.scipy.linalg.lu(I_dp) + I_np = numpy.eye(n) + assert_allclose(dpnp.asnumpy(P), I_np, atol=1e-15) + assert_allclose(dpnp.asnumpy(L), I_np, atol=1e-15) + assert_allclose(dpnp.asnumpy(U), I_np, atol=1e-15) + + def test_1d_input_raises(self): + a_dp = dpnp.array([1.0, 2.0, 3.0]) + with pytest.raises(ValueError): + dpnp.scipy.linalg.lu(a_dp) + + @pytest.mark.parametrize("bad", [numpy.inf, -numpy.inf, numpy.nan]) + def test_check_finite_raises(self, bad): + a_dp = dpnp.array([[1.0, 2.0], [3.0, bad]], order="F") + assert_raises( + ValueError, dpnp.scipy.linalg.lu, a_dp, check_finite=True + ) + + def test_check_finite_disabled(self): + a_dp = dpnp.array([[1.0, numpy.nan], [3.0, 4.0]]) + result = dpnp.scipy.linalg.lu(a_dp, check_finite=False) + assert len(result) == 3 + + +class TestLuBatched: + @staticmethod + def _make_nonsingular_nd_np(shape, dtype, order): + A = generate_random_numpy_array(shape, dtype, order) + m, n = shape[-2], shape[-1] + k = min(m, n) + A3 = A.reshape((-1, m, n)) + for B in A3: + for i in range(k): + off = numpy.sum(numpy.abs(B[i, :n])) - numpy.abs(B[i, i]) + B[i, i] = A.dtype.type(off + 1.0) + A = A3.reshape(shape) + A = numpy.array(A, order=order) + return A + + @staticmethod + def _reconstruct_p_indices(p, L, U): + """Reconstruct A from (p, L, U) for batched p_indices mode.""" + idx = dpnp.expand_dims(p, axis=-1) + idx = dpnp.broadcast_to(idx, L.shape).copy() + PL = dpnp.take_along_axis(L, idx, axis=-2) + return PL @ U + + @pytest.mark.parametrize( + "shape", + [(2, 2, 2), (3, 4, 4), (2, 3, 5, 2), (4, 1, 3)], + ids=["(2,2,2)", "(3,4,4)", "(2,3,5,2)", "(4,1,3)"], + ) + @pytest.mark.parametrize("order", ["C", "F"]) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_lu_default_batched(self, shape, order, dtype): + a_np = self._make_nonsingular_nd_np(shape, dtype, order) + a_dp = dpnp.array(a_np, order=order) + + P, L, U = dpnp.scipy.linalg.lu(a_dp) + + m, n = shape[-2], shape[-1] + k = min(m, n) + assert P.shape == (*shape[:-2], m, m) + assert L.shape == (*shape[:-2], m, k) + assert U.shape == (*shape[:-2], k, n) + + A_cast = a_dp.astype(L.dtype, copy=False) + A_rec = P @ L @ U + assert dpnp.allclose(A_rec, A_cast, rtol=1e-6, atol=1e-6) + + @pytest.mark.parametrize( + "shape", + [(2, 2, 2), (3, 4, 4), (2, 3, 5, 2), (4, 1, 3)], + ids=["(2,2,2)", "(3,4,4)", "(2,3,5,2)", "(4,1,3)"], + ) + @pytest.mark.parametrize("order", ["C", "F"]) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_lu_permute_l_batched(self, shape, order, dtype): + a_np = self._make_nonsingular_nd_np(shape, dtype, order) + a_dp = dpnp.array(a_np, order=order) + + PL, U = dpnp.scipy.linalg.lu(a_dp, permute_l=True) + + m, n = shape[-2], shape[-1] + k = min(m, n) + assert PL.shape == (*shape[:-2], m, k) + assert U.shape == (*shape[:-2], k, n) + + A_cast = a_dp.astype(PL.dtype, copy=False) + A_rec = PL @ U + assert dpnp.allclose(A_rec, A_cast, rtol=1e-6, atol=1e-6) + + @pytest.mark.parametrize( + "shape", + [(2, 2, 2), (3, 4, 4), (2, 3, 5, 2), (4, 1, 3)], + ids=["(2,2,2)", "(3,4,4)", "(2,3,5,2)", "(4,1,3)"], + ) + @pytest.mark.parametrize("order", ["C", "F"]) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_lu_p_indices_batched(self, shape, order, dtype): + a_np = self._make_nonsingular_nd_np(shape, dtype, order) + a_dp = dpnp.array(a_np, order=order) + + p, L, U = dpnp.scipy.linalg.lu(a_dp, p_indices=True) + + m, n = shape[-2], shape[-1] + k = min(m, n) + assert p.shape == (*shape[:-2], m) + assert L.shape == (*shape[:-2], m, k) + assert U.shape == (*shape[:-2], k, n) + assert dpnp.issubdtype(p.dtype, dpnp.integer) + + A_cast = a_dp.astype(L.dtype, copy=False) + A_rec = self._reconstruct_p_indices(p, L, U) + assert dpnp.allclose(A_rec, A_cast, rtol=1e-6, atol=1e-6) + + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + @pytest.mark.parametrize("order", ["C", "F"]) + def test_overwrite_a(self, dtype, order): + a_np = self._make_nonsingular_nd_np((3, 2, 2), dtype, order) + a_dp = dpnp.array(a_np, order=order) + a_dp_orig = a_dp.copy() + + dpnp.scipy.linalg.lu(a_dp, overwrite_a=False) + assert dpnp.allclose(a_dp, a_dp_orig) + + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + def test_modes_consistency_batched(self, dtype): + a_np = self._make_nonsingular_nd_np((3, 4, 4), dtype, "F") + a_dp = dpnp.array(a_np, order="F") + A_cast = a_dp.astype( + dpnp.complex128 if dpnp.issubdtype(dtype, dpnp.complexfloating) + else dpnp.float64, copy=False + ) + + P, L, U = dpnp.scipy.linalg.lu(a_dp) + PL, U2 = dpnp.scipy.linalg.lu(a_dp, permute_l=True) + p, L3, U3 = dpnp.scipy.linalg.lu(a_dp, p_indices=True) + + A1 = P @ L @ U + A2 = PL @ U2 + A3 = self._reconstruct_p_indices(p, L3, U3) + + A_cast2 = a_dp.astype(L.dtype, copy=False) + assert dpnp.allclose(A1, A_cast2, rtol=1e-6, atol=1e-6) + assert dpnp.allclose(A2, A_cast2, rtol=1e-6, atol=1e-6) + assert dpnp.allclose(A3, A_cast2, rtol=1e-6, atol=1e-6) + + @pytest.mark.parametrize( + "shape", [(0, 2, 2), (2, 0, 2), (2, 2, 0), (0, 0, 0)] + ) + def test_empty_inputs(self, shape): + a = dpnp.empty(shape, dtype=dpnp.default_float_type(), order="F") + + P, L, U = dpnp.scipy.linalg.lu(a) + m, n = shape[-2:] + k = min(m, n) + assert P.shape == (*shape[:-2], m, m) + assert L.shape == (*shape[:-2], m, k) + assert U.shape == (*shape[:-2], k, n) + + @pytest.mark.parametrize( + "shape", [(0, 2, 2), (2, 0, 2), (2, 2, 0), (0, 0, 0)] + ) + def test_empty_permute_l(self, shape): + a = dpnp.empty(shape, dtype=dpnp.default_float_type(), order="F") + + PL, U = dpnp.scipy.linalg.lu(a, permute_l=True) + m, n = shape[-2:] + k = min(m, n) + assert PL.shape == (*shape[:-2], m, k) + assert U.shape == (*shape[:-2], k, n) + + @pytest.mark.parametrize( + "shape", [(0, 2, 2), (2, 0, 2), (2, 2, 0), (0, 0, 0)] + ) + def test_empty_p_indices(self, shape): + a = dpnp.empty(shape, dtype=dpnp.default_float_type(), order="F") + + p, L, U = dpnp.scipy.linalg.lu(a, p_indices=True) + m, n = shape[-2:] + k = min(m, n) + assert p.shape == (*shape[:-2], m) + assert L.shape == (*shape[:-2], m, k) + assert U.shape == (*shape[:-2], k, n) + + def test_strided(self): + a_np = self._make_nonsingular_nd_np( + (5, 3, 3), dpnp.default_float_type(), "F" + ) + a_dp = dpnp.array(a_np, order="F") + a_stride = a_dp[::2] + + P, L, U = dpnp.scipy.linalg.lu(a_stride) + for i in range(a_stride.shape[0]): + A_rec = dpnp.asnumpy(P[i] @ L[i] @ U[i]) + A_orig = dpnp.asnumpy(a_stride[i].astype(L.dtype, copy=False)) + assert_allclose(A_rec, A_orig, rtol=1e-6, atol=1e-6) + + def test_singular_matrix(self): + a = dpnp.zeros((3, 2, 2), dtype=dpnp.default_float_type()) + a[0] = dpnp.array([[1.0, 2.0], [2.0, 4.0]]) + a[1] = dpnp.eye(2) + a[2] = dpnp.array([[1.0, 1.0], [1.0, 1.0]]) + + P, L, U = dpnp.scipy.linalg.lu(a) + A_rec = P @ L @ U + assert dpnp.allclose(A_rec, a, rtol=1e-6, atol=1e-6) + + def test_check_finite_raises(self): + a = dpnp.ones((2, 3, 3), dtype=dpnp.default_float_type(), order="F") + a[1, 0, 0] = dpnp.nan + assert_raises( + ValueError, dpnp.scipy.linalg.lu, a, check_finite=True + ) + + class TestMatrixPower: @pytest.mark.parametrize("dtype", get_all_dtypes()) @pytest.mark.parametrize( diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py index d1853579036a..e392f4b733c4 100644 --- a/dpnp/tests/test_sycl_queue.py +++ b/dpnp/tests/test_sycl_queue.py @@ -1675,6 +1675,13 @@ def test_lu_factor(self, data, device): ([[[1.0, 2.0], [3.0, 5.0]]], numpy.empty((2, 0, 2))), ], ) + def test_lu(self, data, device): + a = dpnp.array(data, device=device) + result = dpnp.scipy.linalg.lu(a) + + for param in result: + param_queue = param.sycl_queue + assert_sycl_queue_equal(param_queue, a.sycl_queue) def test_lu_solve(self, a_data, b_data, device): a = dpnp.array(a_data, device=device) lu, piv = dpnp.scipy.linalg.lu_factor(a) diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py index 4fc0f2b958fa..6eecbb243edc 100644 --- a/dpnp/tests/test_usm_type.py +++ b/dpnp/tests/test_usm_type.py @@ -1531,6 +1531,13 @@ def test_lstsq(self, m, n, nrhs, usm_type, usm_type_other): "data", [[[1.0, 2.0], [3.0, 5.0]], [[]], [[[1.0, 2.0], [3.0, 5.0]]], [[[]]]], ) + def test_lu(self, data, usm_type): + a = dpnp.array(data, usm_type=usm_type) + result = dpnp.scipy.linalg.lu(a) + + assert a.usm_type == usm_type + for param in result: + assert param.usm_type == a.usm_type def test_lu_factor(self, data, usm_type): a = dpnp.array(data, usm_type=usm_type) result = dpnp.scipy.linalg.lu_factor(a) From 206f4ee83b121f05fcfc0afe1d48833fd8a1eb91 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Fri, 27 Feb 2026 20:50:17 +0000 Subject: [PATCH 02/23] Fix the test in test_sycl_queue.py --- dpnp/tests/test_sycl_queue.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py index e392f4b733c4..77d659305c06 100644 --- a/dpnp/tests/test_sycl_queue.py +++ b/dpnp/tests/test_sycl_queue.py @@ -1662,6 +1662,13 @@ def test_lu_factor(self, data, device): a = dpnp.array(data, device=device) result = dpnp.scipy.linalg.lu_factor(a) + for param in result: + param_queue = param.sycl_queue + assert_sycl_queue_equal(param_queue, a.sycl_queue) + def test_lu(self, data, device): + a = dpnp.array(data, device=device) + result = dpnp.scipy.linalg.lu(a) + for param in result: param_queue = param.sycl_queue assert_sycl_queue_equal(param_queue, a.sycl_queue) @@ -1675,13 +1682,6 @@ def test_lu_factor(self, data, device): ([[[1.0, 2.0], [3.0, 5.0]]], numpy.empty((2, 0, 2))), ], ) - def test_lu(self, data, device): - a = dpnp.array(data, device=device) - result = dpnp.scipy.linalg.lu(a) - - for param in result: - param_queue = param.sycl_queue - assert_sycl_queue_equal(param_queue, a.sycl_queue) def test_lu_solve(self, a_data, b_data, device): a = dpnp.array(a_data, device=device) lu, piv = dpnp.scipy.linalg.lu_factor(a) From 7a6b5bd4d37099e338eb245aef84f79f98a13126 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Sat, 28 Feb 2026 16:31:04 +0000 Subject: [PATCH 03/23] Fix black formatting --- dpnp/scipy/linalg/_decomp_lu.py | 5 +++-- dpnp/scipy/linalg/_utils.py | 12 +++++++----- dpnp/tests/test_linalg.py | 24 ++++++++++++------------ dpnp/tests/test_sycl_queue.py | 1 + dpnp/tests/test_usm_type.py | 1 + 5 files changed, 24 insertions(+), 19 deletions(-) diff --git a/dpnp/scipy/linalg/_decomp_lu.py b/dpnp/scipy/linalg/_decomp_lu.py index 85161cec2c6f..c890bb8cc090 100644 --- a/dpnp/scipy/linalg/_decomp_lu.py +++ b/dpnp/scipy/linalg/_decomp_lu.py @@ -52,8 +52,9 @@ ) -def lu(a, permute_l=False, overwrite_a=False, check_finite=True, - p_indices=False): +def lu( + a, permute_l=False, overwrite_a=False, check_finite=True, p_indices=False +): """ Compute LU decomposition of a matrix with partial pivoting. diff --git a/dpnp/scipy/linalg/_utils.py b/dpnp/scipy/linalg/_utils.py index 0f6df9289a54..1a62e8da7212 100644 --- a/dpnp/scipy/linalg/_utils.py +++ b/dpnp/scipy/linalg/_utils.py @@ -137,7 +137,9 @@ def _get_real_dtype(res_type): """ if dpnp.issubdtype(res_type, dpnp.complexfloating): - return dpnp.float32 if dpnp.dtype(res_type).itemsize <= 8 else dpnp.float64 + return ( + dpnp.float32 if dpnp.dtype(res_type).itemsize <= 8 else dpnp.float64 + ) return res_type @@ -451,12 +453,12 @@ def _pivots_to_permutation(piv, m): j = piv[..., i : i + 1] # Gather the two values to be swapped. - val_i = perm[..., i : i + 1].copy() # slice (free) - val_j = dpnp.take_along_axis(perm, j, axis=-1) # gather + val_i = perm[..., i : i + 1].copy() # slice (free) + val_j = dpnp.take_along_axis(perm, j, axis=-1) # gather # Perform the swap. - perm[..., i : i + 1] = val_j # slice assign - dpnp.put_along_axis(perm, j, val_i, axis=-1) # scatter + perm[..., i : i + 1] = val_j # slice assign + dpnp.put_along_axis(perm, j, val_i, axis=-1) # scatter return perm diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index e92b9f65c182..39ddff426973 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -2743,7 +2743,9 @@ def test_p_is_permutation(self, dtype): assert_allclose(P_np.sum(axis=0), numpy.ones(5, dtype=P_np.dtype)) assert_allclose(P_np.sum(axis=1), numpy.ones(5, dtype=P_np.dtype)) - assert_allclose(P_np.T @ P_np, numpy.eye(5, dtype=P_np.dtype), atol=1e-15) + assert_allclose( + P_np.T @ P_np, numpy.eye(5, dtype=P_np.dtype), atol=1e-15 + ) @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) def test_modes_consistency(self, dtype): @@ -2825,9 +2827,7 @@ def test_empty_p_indices(self, shape): ], ) def test_strided(self, sl): - base = self._make_nonsingular_np( - (7, 7), dpnp.default_float_type(), "F" - ) + base = self._make_nonsingular_np((7, 7), dpnp.default_float_type(), "F") a_np = base[sl] a_dp = dpnp.array(a_np) @@ -2859,9 +2859,7 @@ def test_1d_input_raises(self): @pytest.mark.parametrize("bad", [numpy.inf, -numpy.inf, numpy.nan]) def test_check_finite_raises(self, bad): a_dp = dpnp.array([[1.0, 2.0], [3.0, bad]], order="F") - assert_raises( - ValueError, dpnp.scipy.linalg.lu, a_dp, check_finite=True - ) + assert_raises(ValueError, dpnp.scipy.linalg.lu, a_dp, check_finite=True) def test_check_finite_disabled(self): a_dp = dpnp.array([[1.0, numpy.nan], [3.0, 4.0]]) @@ -2976,8 +2974,12 @@ def test_modes_consistency_batched(self, dtype): a_np = self._make_nonsingular_nd_np((3, 4, 4), dtype, "F") a_dp = dpnp.array(a_np, order="F") A_cast = a_dp.astype( - dpnp.complex128 if dpnp.issubdtype(dtype, dpnp.complexfloating) - else dpnp.float64, copy=False + ( + dpnp.complex128 + if dpnp.issubdtype(dtype, dpnp.complexfloating) + else dpnp.float64 + ), + copy=False, ) P, L, U = dpnp.scipy.linalg.lu(a_dp) @@ -3057,9 +3059,7 @@ def test_singular_matrix(self): def test_check_finite_raises(self): a = dpnp.ones((2, 3, 3), dtype=dpnp.default_float_type(), order="F") a[1, 0, 0] = dpnp.nan - assert_raises( - ValueError, dpnp.scipy.linalg.lu, a, check_finite=True - ) + assert_raises(ValueError, dpnp.scipy.linalg.lu, a, check_finite=True) class TestMatrixPower: diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py index 77d659305c06..b0155658a4df 100644 --- a/dpnp/tests/test_sycl_queue.py +++ b/dpnp/tests/test_sycl_queue.py @@ -1665,6 +1665,7 @@ def test_lu_factor(self, data, device): for param in result: param_queue = param.sycl_queue assert_sycl_queue_equal(param_queue, a.sycl_queue) + def test_lu(self, data, device): a = dpnp.array(data, device=device) result = dpnp.scipy.linalg.lu(a) diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py index 6eecbb243edc..59a6b05f688e 100644 --- a/dpnp/tests/test_usm_type.py +++ b/dpnp/tests/test_usm_type.py @@ -1538,6 +1538,7 @@ def test_lu(self, data, usm_type): assert a.usm_type == usm_type for param in result: assert param.usm_type == a.usm_type + def test_lu_factor(self, data, usm_type): a = dpnp.array(data, usm_type=usm_type) result = dpnp.scipy.linalg.lu_factor(a) From 1f6c92849e72ff3c1165844ead66ea3d85efecda Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Sun, 1 Mar 2026 19:59:58 +0000 Subject: [PATCH 04/23] fix formatting and apply plint --- dpnp/scipy/linalg/_decomp_lu.py | 10 +-- dpnp/scipy/linalg/_utils.py | 126 +++++++++++++------------------- 2 files changed, 55 insertions(+), 81 deletions(-) diff --git a/dpnp/scipy/linalg/_decomp_lu.py b/dpnp/scipy/linalg/_decomp_lu.py index c890bb8cc090..823b2fccc230 100644 --- a/dpnp/scipy/linalg/_decomp_lu.py +++ b/dpnp/scipy/linalg/_decomp_lu.py @@ -323,13 +323,13 @@ def lu_solve(lu_and_piv, b, trans=0, overwrite_b=False, check_finite=True): """ - lu, piv = lu_and_piv - dpnp.check_supported_arrays_type(lu, piv, b) - assert_stacked_2d(lu) - assert_stacked_square(lu) + lu_matrix, piv = lu_and_piv + dpnp.check_supported_arrays_type(lu_matrix, piv, b) + assert_stacked_2d(lu_matrix) + assert_stacked_square(lu_matrix) return dpnp_lu_solve( - lu, + lu_matrix, piv, b, trans=trans, diff --git a/dpnp/scipy/linalg/_utils.py b/dpnp/scipy/linalg/_utils.py index 1a62e8da7212..76e0db6752b0 100644 --- a/dpnp/scipy/linalg/_utils.py +++ b/dpnp/scipy/linalg/_utils.py @@ -596,126 +596,100 @@ def dpnp_lu( if not dpnp.isfinite(a).all(): raise ValueError("array must not contain infs or NaNs") - _L = dpnp.ones( + low = dpnp.ones( a.shape, dtype=res_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue, ) - _U = dpnp.array(a, dtype=res_type) - - if permute_l: - return _L.copy(), _U - - if p_indices: - _p = dpnp.zeros( - (*batch_shape, 1), - dtype=dpnp.int64, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) - return _p, _L, _U - - _P = dpnp.ones( - a.shape, - dtype=real_type, + up = dpnp.array(a, dtype=res_type) + inv_perm = dpnp.zeros( + (*batch_shape, 1), + dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue, ) - return _P, _L, _U # ---- Fast path: empty arrays ---- - if a.size == 0: - _L = dpnp.empty( + elif a.size == 0: + low = dpnp.empty( (*batch_shape, m, k), dtype=res_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue, ) - _U = dpnp.empty( + up = dpnp.empty( (*batch_shape, k, n), dtype=res_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue, ) - - if permute_l: - return _L, _U - - if p_indices: - _p = dpnp.empty( - (*batch_shape, m), - dtype=dpnp.int64, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) - return _p, _L, _U - - _P = dpnp.empty( - (*batch_shape, m, m), - dtype=real_type, + inv_perm = dpnp.empty( + (*batch_shape, m), + dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue, ) - return _P, _L, _U # ---- General case: LAPACK factorization ---- - lu_compact, piv = dpnp_lu_factor( - a, overwrite_a=overwrite_a, check_finite=check_finite - ) - - # ---- Extract L: lower-triangular with unit diagonal ---- - # L has shape (..., M, K). - _L = dpnp.tril(lu_compact[..., :, :k], k=-1) - _L += dpnp.eye( - m, - k, - dtype=lu_compact.dtype, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) - - # ---- Extract U: upper-triangular ---- - # U has shape (..., K, N). - _U = dpnp.triu(lu_compact[..., :k, :]) - - # ---- Convert pivot indices → row permutation ---- - # ``perm`` (forward): A[perm] = L @ U. - # This is the only step that requires a host transfer because the - # sequential swap semantics of LAPACK pivots cannot be parallelised. - # Only the small pivot array (min(M, N) elements per slice) is - # transferred; all subsequent work stays on the device. - perm = _pivots_to_permutation(piv, m) + else: + lu_compact, piv = dpnp_lu_factor( + a, overwrite_a=overwrite_a, check_finite=check_finite + ) - # ``inv_perm`` (inverse): A = L[inv_perm] @ U. - # This is SciPy's ``p_indices`` convention. - # ``dpnp.argsort`` is an efficient on-device O(M log M) operation - # that avoids a second host round-trip. - inv_perm = dpnp.argsort(perm, axis=-1).astype(dpnp.int64) + # ---- Extract L: lower-triangular with unit diagonal ---- + # L has shape (..., M, K). + low = dpnp.tril(lu_compact[..., :, :k], k=-1) + low += dpnp.eye( + m, + k, + dtype=lu_compact.dtype, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + # ---- Extract U: upper-triangular ---- + # U has shape (..., K, N). + up = dpnp.triu(lu_compact[..., :k, :]) + + # ---- Convert pivot indices → row permutation ---- + # ``perm`` (forward): A[perm] = L @ U. + # This is the only step that requires a host transfer because the + # sequential swap semantics of LAPACK pivots cannot be parallelised. + # Only the small pivot array (min(M, N) elements per slice) is + # transferred; all subsequent work stays on the device. + perm = _pivots_to_permutation(piv, m) + + # ``inv_perm`` (inverse): A = L[inv_perm] @ U. + # This is SciPy's ``p_indices`` convention. + # ``dpnp.argsort`` is an efficient on-device O(M log M) operation + # that avoids a second host round-trip. + inv_perm = dpnp.argsort(perm, axis=-1).astype(dpnp.int64) + + # ---- Assemble output (SciPy convention) ---- if permute_l: # Return (PL, U) where PL = P @ L = L[inv_perm]. # A = PL @ U directly. - _PL = _apply_permutation_to_rows(_L, inv_perm) - return _PL, _U + perm_low = _apply_permutation_to_rows(low, inv_perm) + return perm_low, up if p_indices: # SciPy convention: A = L[inv_perm] @ U. - return inv_perm, _L, _U + return inv_perm, low, up # ---- Build full permutation matrix P = I[inv_perm] ---- # P has shape (..., M, M) with real dtype (SciPy convention). # The gather from an identity matrix is efficient on device: # each output row selects one row of the identity (one hot encoding). - _I = dpnp.eye( + eye_m = dpnp.eye( m, dtype=real_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue, ) - _P = _apply_permutation_to_rows(_I, inv_perm) + perm_matrix = _apply_permutation_to_rows(eye_m, inv_perm) - return _P, _L, _U + return perm_matrix, low, up def dpnp_lu_solve(lu, piv, b, trans=0, overwrite_b=False, check_finite=True): From 67314b7a3de00d6e590cb6dc8e309844fda0f66e Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty <59661409+abagusetty@users.noreply.github.com> Date: Mon, 2 Mar 2026 09:18:08 -0600 Subject: [PATCH 05/23] Update dpnp/scipy/linalg/_utils.py Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/scipy/linalg/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/scipy/linalg/_utils.py b/dpnp/scipy/linalg/_utils.py index 76e0db6752b0..00dddd78bed1 100644 --- a/dpnp/scipy/linalg/_utils.py +++ b/dpnp/scipy/linalg/_utils.py @@ -112,7 +112,7 @@ def _apply_permutation_to_rows(mat, perm_indices): return mat[perm_indices] # Batched case: ensure *mat* has the same batch dimensions as - # *perm_indices*. This is needed, for example, when permuting + # *perm_indices*. This is needed, for example, when permuting # a shared identity matrix across a batch. target_shape = perm_indices.shape[:-1] + mat.shape[-2:] if mat.shape != target_shape: From 384618b0c2ee2d90decc1cdc7de02f7a0ad23fc1 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty <59661409+abagusetty@users.noreply.github.com> Date: Mon, 2 Mar 2026 09:18:32 -0600 Subject: [PATCH 06/23] Update dpnp/scipy/linalg/_utils.py Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/scipy/linalg/_utils.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/dpnp/scipy/linalg/_utils.py b/dpnp/scipy/linalg/_utils.py index 00dddd78bed1..12b32fb5dc0b 100644 --- a/dpnp/scipy/linalg/_utils.py +++ b/dpnp/scipy/linalg/_utils.py @@ -596,12 +596,7 @@ def dpnp_lu( if not dpnp.isfinite(a).all(): raise ValueError("array must not contain infs or NaNs") - low = dpnp.ones( - a.shape, - dtype=res_type, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) + low = dpnp.ones_like(a, dtype=res_type) up = dpnp.array(a, dtype=res_type) inv_perm = dpnp.zeros( (*batch_shape, 1), From 5b83946c5f7c05c0cf77afc32c1ae798cdba3ed4 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Mon, 2 Mar 2026 15:28:35 +0000 Subject: [PATCH 07/23] udapte CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f177be311f84..cef92676b708 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum * Added implementation of `dpnp.ndarray.__bytes__` method [#2671](https://github.com/IntelPython/dpnp/pull/2671) * Added implementation of `dpnp.divmod` [#2674](https://github.com/IntelPython/dpnp/pull/2674) * Added implementation of `dpnp.isin` function [#2595](https://github.com/IntelPython/dpnp/pull/2595) +* Added implementation of `dpnp.scipy.linalg.lu` for 2D inputs (SciPy-compatible) [#2787](https://github.com/IntelPython/dpnp/pull/2787) ### Changed From 7161aef58e26983fcab25986cae5d4f7b376eeaa Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Mon, 2 Mar 2026 17:20:23 +0000 Subject: [PATCH 08/23] address comments from PR --- dpnp/scipy/linalg/_utils.py | 134 ++++++++---------- .../linalg_tests/test_decomp_lu.py | 15 +- 2 files changed, 65 insertions(+), 84 deletions(-) diff --git a/dpnp/scipy/linalg/_utils.py b/dpnp/scipy/linalg/_utils.py index 12b32fb5dc0b..f74c78381577 100644 --- a/dpnp/scipy/linalg/_utils.py +++ b/dpnp/scipy/linalg/_utils.py @@ -49,7 +49,7 @@ import dpnp import dpnp.backend.extensions.lapack._lapack_impl as li from dpnp.dpnp_utils import get_usm_allocations -from dpnp.linalg.dpnp_utils_linalg import _common_type +from dpnp.linalg.dpnp_utils_linalg import _common_type, _real_type def _align_lu_solve_broadcast(lu, b): @@ -125,24 +125,6 @@ def _apply_permutation_to_rows(mat, perm_indices): return dpnp.take_along_axis(mat, idx, axis=-2) -def _get_real_dtype(res_type): - """ - Return the real floating-point counterpart of *res_type*. - - SciPy uses the real dtype for the permutation matrix ``P`` even when - the input is complex (``P`` only contains 0s and 1s). - - ``float32`` and ``complex64`` → ``float32``; - ``float64`` and ``complex128`` → ``float64``. - """ - - if dpnp.issubdtype(res_type, dpnp.complexfloating): - return ( - dpnp.float32 if dpnp.dtype(res_type).itemsize <= 8 else dpnp.float64 - ) - return res_type - - def _batched_lu_factor_scipy(a, res_type): # pylint: disable=too-many-locals """SciPy-compatible LU factorization for batched inputs.""" @@ -586,7 +568,7 @@ def dpnp_lu( # The permutation matrix P uses a real dtype (SciPy convention): # P only contains 0s and 1s, so complex storage would be wasteful. - real_type = _get_real_dtype(res_type) + real_type = _real_type(res_type) # ---- Fast path: scalar (1x1) matrices ---- # For 1x1 input, P = I, L = I, U = A. This avoids invoking LAPACK @@ -596,70 +578,70 @@ def dpnp_lu( if not dpnp.isfinite(a).all(): raise ValueError("array must not contain infs or NaNs") - low = dpnp.ones_like(a, dtype=res_type) - up = dpnp.array(a, dtype=res_type) - inv_perm = dpnp.zeros( - (*batch_shape, 1), - dtype=dpnp.int64, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, + low = dpnp.ones_like(a, shape=a.shape, dtype=res_type) + up = dpnp.astype(a, res_type, copy=not overwrite_a) + inv_perm = dpnp.zeros_like(a, shape=(*batch_shape, 1), dtype=dpnp.int64) + + if permute_l: + # P = I, so PL = L unchanged; no gather needed. + return low, up + if p_indices: + return inv_perm, low, up + # P = I exactly: return eye directly, no gather needed. + eye_m = dpnp.eye( + m, dtype=real_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue ) + return eye_m, low, up # ---- Fast path: empty arrays ---- - elif a.size == 0: - low = dpnp.empty( - (*batch_shape, m, k), - dtype=res_type, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) - up = dpnp.empty( - (*batch_shape, k, n), - dtype=res_type, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) - inv_perm = dpnp.empty( - (*batch_shape, m), - dtype=dpnp.int64, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, + if a.size == 0: + low = dpnp.empty_like(a, shape=(*batch_shape, m, k), dtype=res_type) + up = dpnp.empty_like(a, shape=(*batch_shape, k, n), dtype=res_type) + inv_perm = dpnp.empty_like(a, shape=(*batch_shape, m), dtype=dpnp.int64) + + if permute_l: + return low, up + if p_indices: + return inv_perm, low, up + # P is empty: allocate directly, no eye + gather needed. + perm_matrix = dpnp.empty_like( + a, shape=(*batch_shape, m, m), dtype=real_type ) + return perm_matrix, low, up # ---- General case: LAPACK factorization ---- - else: - lu_compact, piv = dpnp_lu_factor( - a, overwrite_a=overwrite_a, check_finite=check_finite - ) + lu_compact, piv = dpnp_lu_factor( + a, overwrite_a=overwrite_a, check_finite=check_finite + ) - # ---- Extract L: lower-triangular with unit diagonal ---- - # L has shape (..., M, K). - low = dpnp.tril(lu_compact[..., :, :k], k=-1) - low += dpnp.eye( - m, - k, - dtype=lu_compact.dtype, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) + # ---- Extract L: lower-triangular with unit diagonal ---- + # L has shape (..., M, K). + low = dpnp.tril(lu_compact[..., :, :k], k=-1) + low += dpnp.eye( + m, + k, + dtype=lu_compact.dtype, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) - # ---- Extract U: upper-triangular ---- - # U has shape (..., K, N). - up = dpnp.triu(lu_compact[..., :k, :]) - - # ---- Convert pivot indices → row permutation ---- - # ``perm`` (forward): A[perm] = L @ U. - # This is the only step that requires a host transfer because the - # sequential swap semantics of LAPACK pivots cannot be parallelised. - # Only the small pivot array (min(M, N) elements per slice) is - # transferred; all subsequent work stays on the device. - perm = _pivots_to_permutation(piv, m) - - # ``inv_perm`` (inverse): A = L[inv_perm] @ U. - # This is SciPy's ``p_indices`` convention. - # ``dpnp.argsort`` is an efficient on-device O(M log M) operation - # that avoids a second host round-trip. - inv_perm = dpnp.argsort(perm, axis=-1).astype(dpnp.int64) + # ---- Extract U: upper-triangular ---- + # U has shape (..., K, N). + up = dpnp.triu(lu_compact[..., :k, :]) + + # ---- Convert pivot indices → row permutation ---- + # ``perm`` (forward): A[perm] = L @ U. + # This is the only step that requires a host transfer because the + # sequential swap semantics of LAPACK pivots cannot be parallelised. + # Only the small pivot array (min(M, N) elements per slice) is + # transferred; all subsequent work stays on the device. + perm = _pivots_to_permutation(piv, m) + + # ``inv_perm`` (inverse): A = L[inv_perm] @ U. + # This is SciPy's ``p_indices`` convention. + # ``dpnp.argsort`` is an efficient on-device O(M log M) operation + # that avoids a second host round-trip. + inv_perm = dpnp.argsort(perm, axis=-1).astype(dpnp.int64) # ---- Assemble output (SciPy convention) ---- if permute_l: diff --git a/dpnp/tests/third_party/cupyx/scipy_tests/linalg_tests/test_decomp_lu.py b/dpnp/tests/third_party/cupyx/scipy_tests/linalg_tests/test_decomp_lu.py index fb51c3e39244..440521419652 100644 --- a/dpnp/tests/third_party/cupyx/scipy_tests/linalg_tests/test_decomp_lu.py +++ b/dpnp/tests/third_party/cupyx/scipy_tests/linalg_tests/test_decomp_lu.py @@ -124,7 +124,6 @@ def test_lu_factor_reconstruction_singular(self, dtype): ) @testing.fix_random() @testing.with_requires("scipy") -@pytest.mark.skip("lu() is not supported yet") class TestLU(unittest.TestCase): @testing.for_dtypes("fdFD") @@ -132,7 +131,7 @@ def test_lu(self, dtype): a_cpu = testing.shaped_random(self.shape, numpy, dtype=dtype) a_gpu = cupy.asarray(a_cpu) result_cpu = scipy.linalg.lu(a_cpu, permute_l=self.permute_l) - result_gpu = cupy.linalg.lu(a_gpu, permute_l=self.permute_l) + result_gpu = cupy.scipy.linalg.lu(a_gpu, permute_l=self.permute_l) assert len(result_cpu) == len(result_gpu) if not self.permute_l: # check permutation matrix @@ -140,22 +139,22 @@ def test_lu(self, dtype): result_gpu = list(result_gpu) P_cpu = result_cpu.pop(0) P_gpu = result_gpu.pop(0) - cupy.testing.assert_array_equal(P_gpu, P_cpu) - cupy.testing.assert_allclose(result_gpu[0], result_cpu[0], atol=1e-5) - cupy.testing.assert_allclose(result_gpu[1], result_cpu[1], atol=1e-5) + testing.assert_array_equal(P_gpu, P_cpu) + testing.assert_allclose(result_gpu[0], result_cpu[0], atol=1e-5) + testing.assert_allclose(result_gpu[1], result_cpu[1], atol=1e-5) @testing.for_dtypes("fdFD") def test_lu_reconstruction(self, dtype): m, n = self.shape A = testing.shaped_random(self.shape, cupy, dtype=dtype) if self.permute_l: - PL, U = cupy.linalg.lu(A, permute_l=self.permute_l) + PL, U = cupy.scipy.linalg.lu(A, permute_l=self.permute_l) PLU = PL @ U else: - P, L, U = cupy.linalg.lu(A, permute_l=self.permute_l) + P, L, U = cupy.scipy.linalg.lu(A, permute_l=self.permute_l) PLU = P @ L @ U # check that reconstruction is close to original - cupy.testing.assert_allclose(PLU, A, atol=1e-5) + testing.assert_allclose(PLU, A, atol=1e-5) @testing.parameterize( From ac8117ba370162c6c8a7314cda821148dc7d9033 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Mon, 2 Mar 2026 17:37:39 +0000 Subject: [PATCH 09/23] Fix pylint multiple returns --- dpnp/scipy/linalg/_utils.py | 94 ++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 39 deletions(-) diff --git a/dpnp/scipy/linalg/_utils.py b/dpnp/scipy/linalg/_utils.py index f74c78381577..7f9dff4b2847 100644 --- a/dpnp/scipy/linalg/_utils.py +++ b/dpnp/scipy/linalg/_utils.py @@ -539,6 +539,32 @@ def dpnp_lu_factor(a, overwrite_a=False, check_finite=True): return (a_h, ipiv_h) +def _assemble_lu_output( + low, + up, + inv_perm, + permute_l, + p_indices, + m, + real_type, + a_usm_type, + a_sycl_queue, +): + """Select and build the correct dpnp_lu return value.""" + if permute_l: + return _apply_permutation_to_rows(low, inv_perm), up + if p_indices: + return inv_perm, low, up + eye_m = dpnp.eye( + m, dtype=real_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ) + return ( + _apply_permutation_to_rows(eye_m, inv_perm), + low, + up, + ) # perm_matrix, L, U + + def dpnp_lu( a, overwrite_a=False, @@ -582,32 +608,34 @@ def dpnp_lu( up = dpnp.astype(a, res_type, copy=not overwrite_a) inv_perm = dpnp.zeros_like(a, shape=(*batch_shape, 1), dtype=dpnp.int64) - if permute_l: - # P = I, so PL = L unchanged; no gather needed. - return low, up - if p_indices: - return inv_perm, low, up - # P = I exactly: return eye directly, no gather needed. - eye_m = dpnp.eye( - m, dtype=real_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue + return _assemble_lu_output( + low, + up, + inv_perm, + permute_l, + p_indices, + m, + real_type, + a_usm_type, + a_sycl_queue, ) - return eye_m, low, up # ---- Fast path: empty arrays ---- if a.size == 0: low = dpnp.empty_like(a, shape=(*batch_shape, m, k), dtype=res_type) up = dpnp.empty_like(a, shape=(*batch_shape, k, n), dtype=res_type) inv_perm = dpnp.empty_like(a, shape=(*batch_shape, m), dtype=dpnp.int64) - - if permute_l: - return low, up - if p_indices: - return inv_perm, low, up - # P is empty: allocate directly, no eye + gather needed. - perm_matrix = dpnp.empty_like( - a, shape=(*batch_shape, m, m), dtype=real_type + return _assemble_lu_output( + low, + up, + inv_perm, + permute_l, + p_indices, + m, + real_type, + a_usm_type, + a_sycl_queue, ) - return perm_matrix, low, up # ---- General case: LAPACK factorization ---- lu_compact, piv = dpnp_lu_factor( @@ -644,29 +672,17 @@ def dpnp_lu( inv_perm = dpnp.argsort(perm, axis=-1).astype(dpnp.int64) # ---- Assemble output (SciPy convention) ---- - if permute_l: - # Return (PL, U) where PL = P @ L = L[inv_perm]. - # A = PL @ U directly. - perm_low = _apply_permutation_to_rows(low, inv_perm) - return perm_low, up - - if p_indices: - # SciPy convention: A = L[inv_perm] @ U. - return inv_perm, low, up - - # ---- Build full permutation matrix P = I[inv_perm] ---- - # P has shape (..., M, M) with real dtype (SciPy convention). - # The gather from an identity matrix is efficient on device: - # each output row selects one row of the identity (one hot encoding). - eye_m = dpnp.eye( + return _assemble_lu_output( + low, + up, + inv_perm, + permute_l, + p_indices, m, - dtype=real_type, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, + real_type, + a_usm_type, + a_sycl_queue, ) - perm_matrix = _apply_permutation_to_rows(eye_m, inv_perm) - - return perm_matrix, low, up def dpnp_lu_solve(lu, piv, b, trans=0, overwrite_b=False, check_finite=True): From 3033c0b5be1482b3169815c36bb226db2d1ed87e Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Mon, 2 Mar 2026 18:17:11 +0000 Subject: [PATCH 10/23] Fix the test failing in onemkl tests --- dpnp/tests/test_sycl_queue.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py index b0155658a4df..47eb75b05204 100644 --- a/dpnp/tests/test_sycl_queue.py +++ b/dpnp/tests/test_sycl_queue.py @@ -1654,10 +1654,11 @@ def test_lstsq(self, m, n, nrhs, device): assert_sycl_queue_equal(param_queue, a.sycl_queue) assert_sycl_queue_equal(param_queue, b.sycl_queue) - @pytest.mark.parametrize( - "data", + _LU_TEST_DATA = ( [[[1.0, 2.0], [3.0, 5.0]], [[]], [[[1.0, 2.0], [3.0, 5.0]]], [[[]]]], ) + + @pytest.mark.parametrize("data", _LU_TEST_DATA) def test_lu_factor(self, data, device): a = dpnp.array(data, device=device) result = dpnp.scipy.linalg.lu_factor(a) @@ -1666,6 +1667,7 @@ def test_lu_factor(self, data, device): param_queue = param.sycl_queue assert_sycl_queue_equal(param_queue, a.sycl_queue) + @pytest.mark.parametrize("data", _LU_TEST_DATA) def test_lu(self, data, device): a = dpnp.array(data, device=device) result = dpnp.scipy.linalg.lu(a) From cdb5e324bb94e1753985104e66a32677740a1571 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Mon, 2 Mar 2026 19:13:00 +0000 Subject: [PATCH 11/23] one more try.. --- dpnp/tests/test_sycl_queue.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py index 47eb75b05204..ebe356adbbfc 100644 --- a/dpnp/tests/test_sycl_queue.py +++ b/dpnp/tests/test_sycl_queue.py @@ -1654,11 +1654,10 @@ def test_lstsq(self, m, n, nrhs, device): assert_sycl_queue_equal(param_queue, a.sycl_queue) assert_sycl_queue_equal(param_queue, b.sycl_queue) - _LU_TEST_DATA = ( + @pytest.mark.parametrize( + "data", [[[1.0, 2.0], [3.0, 5.0]], [[]], [[[1.0, 2.0], [3.0, 5.0]]], [[[]]]], ) - - @pytest.mark.parametrize("data", _LU_TEST_DATA) def test_lu_factor(self, data, device): a = dpnp.array(data, device=device) result = dpnp.scipy.linalg.lu_factor(a) @@ -1667,7 +1666,10 @@ def test_lu_factor(self, data, device): param_queue = param.sycl_queue assert_sycl_queue_equal(param_queue, a.sycl_queue) - @pytest.mark.parametrize("data", _LU_TEST_DATA) + @pytest.mark.parametrize( + "data", + [[[1.0, 2.0], [3.0, 5.0]], [[]], [[[1.0, 2.0], [3.0, 5.0]]], [[[]]]], + ) def test_lu(self, data, device): a = dpnp.array(data, device=device) result = dpnp.scipy.linalg.lu(a) From d3dc5da7e4433c41a0a02f5cc5d202b4951d074e Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Mon, 2 Mar 2026 20:16:51 +0000 Subject: [PATCH 12/23] fix one more... --- dpnp/tests/test_usm_type.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py index 59a6b05f688e..b73eb67d51ee 100644 --- a/dpnp/tests/test_usm_type.py +++ b/dpnp/tests/test_usm_type.py @@ -1539,6 +1539,10 @@ def test_lu(self, data, usm_type): for param in result: assert param.usm_type == a.usm_type + @pytest.mark.parametrize( + "data", + [[[1.0, 2.0], [3.0, 5.0]], [[]], [[[1.0, 2.0], [3.0, 5.0]]], [[[]]]], + ) def test_lu_factor(self, data, usm_type): a = dpnp.array(data, usm_type=usm_type) result = dpnp.scipy.linalg.lu_factor(a) From 6e94734891eca13ffc1f89de9e84280ae1586655 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty <59661409+abagusetty@users.noreply.github.com> Date: Tue, 3 Mar 2026 09:46:16 -0600 Subject: [PATCH 13/23] Update dpnp/tests/test_linalg.py Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/tests/test_linalg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index 39ddff426973..f9434d4e654a 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -2643,7 +2643,7 @@ def test_lu_default(self, shape, order, dtype): [(1, 1), (2, 2), (3, 3), (1, 5), (5, 1), (2, 5), (5, 2)], ) @pytest.mark.parametrize("order", ["C", "F"]) - @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True, no_bool=True)) def test_lu_permute_l(self, shape, order, dtype): a_np = self._make_nonsingular_np(shape, dtype, order) a_dp = dpnp.array(a_np, order=order) From aa985e1e185b822162ed8d36463b0b3907adf571 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty <59661409+abagusetty@users.noreply.github.com> Date: Tue, 3 Mar 2026 09:46:31 -0600 Subject: [PATCH 14/23] Update dpnp/tests/test_linalg.py Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/tests/test_linalg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index f9434d4e654a..8cbf8ddf1c37 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -2835,6 +2835,7 @@ def test_strided(self, sl): A_rec = P @ L @ U assert dpnp.allclose(A_rec, a_dp, rtol=1e-6, atol=1e-6) + @pytest.mark.filterwarnings("ignore::RuntimeWarning") def test_singular_matrix(self): a_np = numpy.array([[1.0, 2.0], [2.0, 4.0]]) a_dp = dpnp.array(a_np) From 05e3a0faddcda965d070cb3f8ba2cd348e51492c Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty <59661409+abagusetty@users.noreply.github.com> Date: Tue, 3 Mar 2026 09:46:42 -0600 Subject: [PATCH 15/23] Update dpnp/tests/test_linalg.py Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/tests/test_linalg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index 8cbf8ddf1c37..35ef4cb16d6b 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -3047,6 +3047,7 @@ def test_strided(self): A_orig = dpnp.asnumpy(a_stride[i].astype(L.dtype, copy=False)) assert_allclose(A_rec, A_orig, rtol=1e-6, atol=1e-6) + @pytest.mark.filterwarnings("ignore::RuntimeWarning") def test_singular_matrix(self): a = dpnp.zeros((3, 2, 2), dtype=dpnp.default_float_type()) a[0] = dpnp.array([[1.0, 2.0], [2.0, 4.0]]) From c7a117a4244b94b2d0f33c5dd53bfbb421d2eecb Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty <59661409+abagusetty@users.noreply.github.com> Date: Tue, 3 Mar 2026 09:46:55 -0600 Subject: [PATCH 16/23] Update CHANGELOG.md Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cef92676b708..61cde1ddfefc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,7 +26,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum * Added implementation of `dpnp.ndarray.__bytes__` method [#2671](https://github.com/IntelPython/dpnp/pull/2671) * Added implementation of `dpnp.divmod` [#2674](https://github.com/IntelPython/dpnp/pull/2674) * Added implementation of `dpnp.isin` function [#2595](https://github.com/IntelPython/dpnp/pull/2595) -* Added implementation of `dpnp.scipy.linalg.lu` for 2D inputs (SciPy-compatible) [#2787](https://github.com/IntelPython/dpnp/pull/2787) +* Added implementation of `dpnp.scipy.linalg.lu` (SciPy-compatible) [#2787](https://github.com/IntelPython/dpnp/pull/2787) ### Changed From 4bc7226943737e0dc1ad86996eac062838f4b6b9 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty <59661409+abagusetty@users.noreply.github.com> Date: Tue, 3 Mar 2026 09:47:06 -0600 Subject: [PATCH 17/23] Update dpnp/scipy/linalg/_utils.py Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/scipy/linalg/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/scipy/linalg/_utils.py b/dpnp/scipy/linalg/_utils.py index 7f9dff4b2847..d083f1c2c0a2 100644 --- a/dpnp/scipy/linalg/_utils.py +++ b/dpnp/scipy/linalg/_utils.py @@ -604,7 +604,7 @@ def dpnp_lu( if not dpnp.isfinite(a).all(): raise ValueError("array must not contain infs or NaNs") - low = dpnp.ones_like(a, shape=a.shape, dtype=res_type) + low = dpnp.ones_like(a, dtype=res_type) up = dpnp.astype(a, res_type, copy=not overwrite_a) inv_perm = dpnp.zeros_like(a, shape=(*batch_shape, 1), dtype=dpnp.int64) From 643f34d5ab5f477ce90205ee22cf9a5cf4a9b440 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty <59661409+abagusetty@users.noreply.github.com> Date: Tue, 3 Mar 2026 09:47:21 -0600 Subject: [PATCH 18/23] Update dpnp/tests/test_linalg.py Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/tests/test_linalg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index 35ef4cb16d6b..7e9f5c658634 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -2621,7 +2621,7 @@ def _make_nonsingular_np(shape, dtype, order): [(1, 1), (2, 2), (3, 3), (1, 5), (5, 1), (2, 5), (5, 2)], ) @pytest.mark.parametrize("order", ["C", "F"]) - @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True, no_bool=True)) def test_lu_default(self, shape, order, dtype): a_np = self._make_nonsingular_np(shape, dtype, order) a_dp = dpnp.array(a_np, order=order) From 28f26ebf276502d2455061a343356d3b1b41fc84 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty <59661409+abagusetty@users.noreply.github.com> Date: Tue, 3 Mar 2026 09:47:33 -0600 Subject: [PATCH 19/23] Update dpnp/tests/test_linalg.py Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/tests/test_linalg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index 7e9f5c658634..280000d9ec86 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -2664,7 +2664,7 @@ def test_lu_permute_l(self, shape, order, dtype): [(1, 1), (2, 2), (3, 3), (1, 5), (5, 1), (2, 5), (5, 2)], ) @pytest.mark.parametrize("order", ["C", "F"]) - @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True, no_bool=True)) def test_lu_p_indices(self, shape, order, dtype): a_np = self._make_nonsingular_np(shape, dtype, order) a_dp = dpnp.array(a_np, order=order) From aa00062fa22df5a8b21522431f5862b39d61b6dc Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty <59661409+abagusetty@users.noreply.github.com> Date: Tue, 3 Mar 2026 11:27:03 -0600 Subject: [PATCH 20/23] Update dpnp/tests/test_linalg.py Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/tests/test_linalg.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index 280000d9ec86..dc4f768049e4 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -2685,19 +2685,9 @@ def test_lu_p_indices(self, shape, order, dtype): A_cast = a_dp.astype(L.dtype, copy=False) assert_allclose(A_rec, dpnp.asnumpy(A_cast), rtol=1e-6, atol=1e-6) - @pytest.mark.parametrize( - "in_dtype, expected_p_dtype", - [ - (dpnp.float32, dpnp.float32), - (dpnp.float64, dpnp.float64), - (dpnp.complex64, dpnp.float32), - (dpnp.complex128, dpnp.float64), - ], - ) - def test_p_matrix_dtype(self, in_dtype, expected_p_dtype): - if in_dtype in (dpnp.float64, dpnp.complex128): - if not has_support_aspect64(): - pytest.skip("fp64 not supported on this device") + @pytest.mark.parametrize("in_dtype", get_float_complex_dtypes()) + def test_p_matrix_dtype(self, in_dtype): + expected_p_dtype = numpy.dtype(in_dtype).char.lower() a_np = self._make_nonsingular_np((4, 4), in_dtype, "F") a_dp = dpnp.array(a_np, order="F") From 9654380dd2b8aeee85b0781508216d0b835ddcd1 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty <59661409+abagusetty@users.noreply.github.com> Date: Wed, 4 Mar 2026 00:14:34 -0600 Subject: [PATCH 21/23] Update dpnp/tests/test_linalg.py Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/tests/test_linalg.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index dc4f768049e4..34e4111414e3 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -2964,14 +2964,6 @@ def test_overwrite_a(self, dtype, order): def test_modes_consistency_batched(self, dtype): a_np = self._make_nonsingular_nd_np((3, 4, 4), dtype, "F") a_dp = dpnp.array(a_np, order="F") - A_cast = a_dp.astype( - ( - dpnp.complex128 - if dpnp.issubdtype(dtype, dpnp.complexfloating) - else dpnp.float64 - ), - copy=False, - ) P, L, U = dpnp.scipy.linalg.lu(a_dp) PL, U2 = dpnp.scipy.linalg.lu(a_dp, permute_l=True) From 5de153df74c0b5642636d1e8b6e5f96a7d52b6f6 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty <59661409+abagusetty@users.noreply.github.com> Date: Wed, 4 Mar 2026 00:16:26 -0600 Subject: [PATCH 22/23] Update dpnp/tests/test_linalg.py Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/tests/test_linalg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index 34e4111414e3..2550576ccb2e 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -2683,7 +2683,7 @@ def test_lu_p_indices(self, shape, order, dtype): U_np = dpnp.asnumpy(U) A_rec = L_np[p_np] @ U_np A_cast = a_dp.astype(L.dtype, copy=False) - assert_allclose(A_rec, dpnp.asnumpy(A_cast), rtol=1e-6, atol=1e-6) + assert dpnp.allclose(A_rec, A_cast, rtol=1e-6, atol=1e-6) @pytest.mark.parametrize("in_dtype", get_float_complex_dtypes()) def test_p_matrix_dtype(self, in_dtype): From 69c51d890cddeb00d0dbb4e3097659101cfc2afd Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Wed, 4 Mar 2026 06:31:18 +0000 Subject: [PATCH 23/23] Fix the test_linalg.py, add a test for scalar --- dpnp/tests/test_linalg.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index 2550576ccb2e..7d8018fa83a2 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -2621,7 +2621,9 @@ def _make_nonsingular_np(shape, dtype, order): [(1, 1), (2, 2), (3, 3), (1, 5), (5, 1), (2, 5), (5, 2)], ) @pytest.mark.parametrize("order", ["C", "F"]) - @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True, no_bool=True)) + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_none=True, no_bool=True) + ) def test_lu_default(self, shape, order, dtype): a_np = self._make_nonsingular_np(shape, dtype, order) a_dp = dpnp.array(a_np, order=order) @@ -2643,7 +2645,9 @@ def test_lu_default(self, shape, order, dtype): [(1, 1), (2, 2), (3, 3), (1, 5), (5, 1), (2, 5), (5, 2)], ) @pytest.mark.parametrize("order", ["C", "F"]) - @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True, no_bool=True)) + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_none=True, no_bool=True) + ) def test_lu_permute_l(self, shape, order, dtype): a_np = self._make_nonsingular_np(shape, dtype, order) a_dp = dpnp.array(a_np, order=order) @@ -2664,7 +2668,9 @@ def test_lu_permute_l(self, shape, order, dtype): [(1, 1), (2, 2), (3, 3), (1, 5), (5, 1), (2, 5), (5, 2)], ) @pytest.mark.parametrize("order", ["C", "F"]) - @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True, no_bool=True)) + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_none=True, no_bool=True) + ) def test_lu_p_indices(self, shape, order, dtype): a_np = self._make_nonsingular_np(shape, dtype, order) a_dp = dpnp.array(a_np, order=order) @@ -2678,10 +2684,7 @@ def test_lu_p_indices(self, shape, order, dtype): assert U.shape == (k, n) assert dpnp.issubdtype(p.dtype, dpnp.integer) - p_np = dpnp.asnumpy(p) - L_np = dpnp.asnumpy(L) - U_np = dpnp.asnumpy(U) - A_rec = L_np[p_np] @ U_np + A_rec = L[p] @ U A_cast = a_dp.astype(L.dtype, copy=False) assert dpnp.allclose(A_rec, A_cast, rtol=1e-6, atol=1e-6) @@ -2852,6 +2855,12 @@ def test_check_finite_raises(self, bad): a_dp = dpnp.array([[1.0, 2.0], [3.0, bad]], order="F") assert_raises(ValueError, dpnp.scipy.linalg.lu, a_dp, check_finite=True) + @pytest.mark.parametrize("bad", [numpy.inf, -numpy.inf, numpy.nan]) + def test_check_finite_raises_scalar(self, bad): + # Covers the 1x1 scalar fast path in dpnp_lu + a_dp = dpnp.array([[bad]]) + assert_raises(ValueError, dpnp.scipy.linalg.lu, a_dp, check_finite=True) + def test_check_finite_disabled(self): a_dp = dpnp.array([[1.0, numpy.nan], [3.0, 4.0]]) result = dpnp.scipy.linalg.lu(a_dp, check_finite=False)