From 957b54fbb79e5f29546d9514715fd470f4e1e9b2 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Thu, 2 Jul 2026 15:39:54 +0200 Subject: [PATCH 1/3] perf: drop explicit zeros from the constraint matrix (#814) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expressions that broadcast against a dense coordinate store one coefficient per pair, most of them structurally zero. Those explicit zeros were carried all the way into `matrices.A` and thus into every solver handoff. `highspy.Highs.addRows` (and the other direct backends' matrix loaders) scale with *stored* nnz, so the handoff spent most of its work describing zeros — e.g. sparse_network(250) stored 1.5M entries for 18k structural nonzeros (98.8% zeros). Prune them once, centrally, in `_stack` via `eliminate_zeros()`, so `A` and `indicator_A` — and hence HiGHS, gurobi, xpress, copt, mosek and the LP/MPS writers — all hand the solver only structural nonzeros. A zero coefficient never changes a constraint, so this is mathematically identical. Co-Authored-By: Claude Opus 4.8 (1M context) --- linopy/matrices.py | 14 ++++++++++++-- test/test_matrices.py | 21 +++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/linopy/matrices.py b/linopy/matrices.py index 0feba9c90..9c65d06a2 100644 --- a/linopy/matrices.py +++ b/linopy/matrices.py @@ -22,10 +22,20 @@ def _stack(csrs: list) -> scipy.sparse.csr_array | None: - """Vertically stack CSR blocks, or None when there are none.""" + """ + Vertically stack CSR blocks, or None when there are none. + + Explicit zeros are dropped: expressions that broadcast against a dense + coordinate store one coefficient per pair, most of them zero, and a zero + coefficient never changes a constraint. Keeping them only inflates the + stored nnz handed to the solvers/writers (e.g. ``highspy.addRows`` scales + with stored nnz), so we prune them once, centrally, for every backend. + """ if not csrs: return None - return cast(scipy.sparse.csr_array, scipy.sparse.vstack(csrs, format="csr")) + stacked = cast(scipy.sparse.csr_array, scipy.sparse.vstack(csrs, format="csr")) + stacked.eliminate_zeros() + return stacked def _concat(arrays: list, dtype: type | None = None) -> ndarray: diff --git a/test/test_matrices.py b/test/test_matrices.py index 98a73564b..6da3eaf1d 100644 --- a/test/test_matrices.py +++ b/test/test_matrices.py @@ -68,6 +68,27 @@ def test_matrices_duplicated_variables() -> None: assert np.isin(np.unique(np.array(A)), [0.0, 2.0]).all() +def test_matrices_drops_explicit_zeros() -> None: + # https://github.com/PyPSA/linopy/issues/814 + # Expressions that broadcast against a dense coordinate store one coefficient + # per pair, most of them structurally zero. Those must not reach A, whose + # stored nnz drives the direct-solver handoff (e.g. highspy.addRows). + m = Model() + + x = m.add_variables(coords=[range(4)], name="x") + coeff = xr.DataArray( + np.eye(4), dims=["j", "i"], coords={"j": range(4), "i": range(4)} + ) + m.add_constraints((coeff * x.rename(dim_0="i")).sum("i") <= 1, name="c") + + A = m.matrices.A + assert A is not None + # 4 structural nonzeros on the diagonal; the 12 broadcast zeros are dropped. + assert A.nnz == 4 + assert not (A.data == 0).any() + assert np.array_equal(A.todense(), np.eye(4)) + + def test_matrices_float_c() -> None: # https://github.com/PyPSA/linopy/issues/200 m = Model() From a3a55a29473a194920338f422dab718c01f178b6 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Thu, 2 Jul 2026 16:33:20 +0200 Subject: [PATCH 2/3] perf: prune explicit zeros at matrix assembly to cut build peak memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to #815. That eliminates zeros from the stacked constraint matrix after scipy.vstack has already materialised the full dense-with-zeros block, so the peak allocation (what the CodSpeed memory instrument tracks) is unchanged — only the resting size and solver-ingest cost drop. Prune at the source instead: fold `coeffs != 0` into the valid-entry mask in Constraint._matrix_export_data, so broadcast zeros never enter cols/data/the CSR. Snapshot capture and the matrix build share this path, so they stay consistent. Two ripples handled: - matrices.dual derived active rows from stored nnz (np.diff(indptr)); an all-zero-coefficient row would lose its dual slot once pruned. Derive active rows from row activity via a new ConstraintBase.active_row_mask (also avoids rebuilding the CSR just to count rows). - A zero-coefficient term no longer changes the sparsity pattern, so the persistent warm-start path no longer forces a SPARSITY rebuild for it. Test updated to use a non-zero coefficient; a no-rebuild case added. sparse_network(250): build peak 50 -> 36 MB (~27%), identical solver result. Co-Authored-By: Claude Opus 4.8 (1M context) --- linopy/constraints.py | 19 ++++++++++++++----- linopy/matrices.py | 5 +---- test/test_persistent_snapshot_buffers.py | 15 ++++++++++++++- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/linopy/constraints.py b/linopy/constraints.py index 0b9dbb0a6..28a678235 100644 --- a/linopy/constraints.py +++ b/linopy/constraints.py @@ -243,6 +243,10 @@ def to_matrix_with_rhs( def active_labels(self) -> np.ndarray: """Active constraint labels in build order, without building the CSR.""" + @abstractmethod + def active_row_mask(self) -> np.ndarray: + """Boolean mask over raveled rows selecting active constraint rows.""" + def __getitem__( self, selector: str | int | slice | list | tuple | dict ) -> Constraint: @@ -955,6 +959,9 @@ def to_matrix_with_rhs( def active_labels(self) -> np.ndarray: return self._con_labels + def active_row_mask(self) -> np.ndarray: + return np.ones(self._csr.shape[0], dtype=bool) + def sanitize_zeros(self) -> CSRConstraint: """ Remove terms with zero or near-zero coefficients. @@ -1531,9 +1538,8 @@ def _matrix_export_data( row_mask = (labels_flat != -1) & (vars_2d != -1).any(axis=1) con_labels = labels_flat[row_mask] vars_final = vars_2d[row_mask] - valid_final = vars_final != -1 - coeffs_final = self.coeffs.values.ravel().reshape(vars_2d.shape)[row_mask] + valid_final = (vars_final != -1) & (coeffs_final != 0) cols = label_to_pos[vars_final[valid_final]] data = coeffs_final[valid_final] @@ -1566,7 +1572,8 @@ def to_matrix( csr.sum_duplicates() return csr, con_labels - def active_labels(self) -> np.ndarray: + def active_row_mask(self) -> np.ndarray: + """Boolean mask over raveled rows: label set and at least one variable present.""" labels_flat = self.labels.values.ravel() vars_vals = self.vars.values n_rows = len(labels_flat) @@ -1575,8 +1582,10 @@ def active_labels(self) -> np.ndarray: if n_rows > 0 else vars_vals.reshape(0, max(1, vars_vals.size)) ) - row_mask = (labels_flat != -1) & (vars_2d != -1).any(axis=1) - return labels_flat[row_mask] + return (labels_flat != -1) & (vars_2d != -1).any(axis=1) + + def active_labels(self) -> np.ndarray: + return self.labels.values.ravel()[self.active_row_mask()] def to_matrix_with_rhs( self, label_index: VariableLabelIndex diff --git a/linopy/matrices.py b/linopy/matrices.py index 9c65d06a2..fb0ec2821 100644 --- a/linopy/matrices.py +++ b/linopy/matrices.py @@ -188,7 +188,6 @@ def dual(self) -> ndarray: if not self._parent.status == "ok": raise ValueError("Model is not optimized.") m = self._parent - label_index = m.variables.label_index dual_list = [] has_dual = False for c in m.constraints.data.values(): @@ -202,9 +201,7 @@ def dual(self) -> ndarray: else: dual_list.append(np.full(len(c._con_labels), np.nan)) else: - csr, _ = c.to_matrix(label_index) - nonempty = np.diff(csr.indptr).astype(bool) - active_rows = np.flatnonzero(nonempty) + active_rows = np.flatnonzero(c.active_row_mask()) if "dual" in c.data: dual_list.append(c.dual.values.ravel()[active_rows]) has_dual = True diff --git a/test/test_persistent_snapshot_buffers.py b/test/test_persistent_snapshot_buffers.py index bb801ecf2..76bfa7e63 100644 --- a/test/test_persistent_snapshot_buffers.py +++ b/test/test_persistent_snapshot_buffers.py @@ -58,7 +58,7 @@ def test_shape_mismatch_triggers_sparsity_rebuild(baseline_model: Model) -> None snap = ModelSnapshot.capture(baseline_model) x = baseline_model.variables["x"] y = baseline_model.variables["y"] - baseline_model.constraints["c1"].lhs = 2 * x + 0 * y.sum() + baseline_model.constraints["c1"].lhs = 2 * x + 1 * y.sum() diff = ModelDiff.from_snapshot(snap, baseline_model) assert diff in { RebuildReason.SPARSITY, @@ -66,6 +66,19 @@ def test_shape_mismatch_triggers_sparsity_rebuild(baseline_model: Model) -> None } +def test_zero_coefficient_term_needs_no_rebuild(baseline_model: Model) -> None: + """ + An explicit zero coefficient is pruned from the matrix, so adding one + leaves the sparsity pattern unchanged and needs no rebuild. + """ + snap = ModelSnapshot.capture(baseline_model) + x = baseline_model.variables["x"] + y = baseline_model.variables["y"] + baseline_model.constraints["c1"].lhs = 2 * x + 0 * y.sum() + diff = ModelDiff.from_snapshot(snap, baseline_model) + assert not isinstance(diff, RebuildReason) + + def test_zero_row_container_capture() -> None: m = Model() m.add_variables(0, 10, coords=[range(2)], name="x") From 97ab6921840af2ad38f969252ba2356ec213c3e2 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Thu, 2 Jul 2026 16:38:10 +0200 Subject: [PATCH 3/3] ci: trigger CI Co-Authored-By: Claude Opus 4.8 (1M context)