Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Upcoming Version

*Other*

* Default internal integer labels to ``int32`` (configurable via ``linopy.options["label_dtype"]``, set to ``np.int64`` for the old behavior), cutting memory ~25% and speeding up model build 10-35%. Models exceeding the int32 maximum automatically widen their labels to a larger int dtype instead of raising; ``label_dtype`` acts as a floor.
* ``add_variables(binary=True, ...)`` now accepts ``lower``/``upper`` bounds, as long as they are 0 or 1. Previously binary bounds could only be set via the ``.lower``/``.upper`` setters after creation. (https://github.com/PyPSA/linopy/issues/776)
* ``add_piecewise_formulation`` gained an ``active_fill`` parameter that gates a partial ``active`` (defined over a subset of the indexed dimension, or masked) as always-active (``1``) or always-off (``0``); without it, a partial ``active`` — which was previously zeroed silently — now raises. Useful when one formulation mixes gated and ungated entities (e.g. committable and non-committable units sharing a ``status``). ``active_fill`` is transitional and will be removed once v1 semantics make ``active.reindex(coords).fillna(value)`` sufficient. (https://github.com/PyPSA/linopy/issues/796)

Expand Down
31 changes: 26 additions & 5 deletions linopy/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from __future__ import annotations

import operator
import os
from collections.abc import Callable, Generator, Hashable, Iterable, Sequence
from functools import cached_property, reduce, wraps
from pathlib import Path
Expand Down Expand Up @@ -159,12 +158,10 @@ def infer_schema_polars(ds: Dataset) -> dict[str, DataTypeClass]:
dict: A dictionary mapping column names to their corresponding Polars data types.
"""
schema: dict[str, DataTypeClass] = {}
np_major_version = int(np.__version__.split(".")[0])
use_int32 = os.name == "nt" and np_major_version < 2
for name, array in ds.items():
name = str(name)
if np.issubdtype(array.dtype, np.integer):
schema[name] = pl.Int32 if use_int32 else pl.Int64
schema[name] = pl.Int32 if array.dtype.itemsize <= 4 else pl.Int64
elif np.issubdtype(array.dtype, np.floating):
schema[name] = pl.Float64
elif np.issubdtype(array.dtype, np.bool_):
Expand Down Expand Up @@ -308,7 +305,7 @@ def save_join(*dataarrays: DataArray, integer_dtype: bool = False) -> Dataset:
)
arrs = xr_align(*dataarrays, join="outer")
if integer_dtype:
arrs = tuple([ds.fillna(-1).astype(int) for ds in arrs])
arrs = tuple([astype_labels(ds) for ds in arrs])
return Dataset({ds.name: ds for ds in arrs})


Expand Down Expand Up @@ -487,6 +484,30 @@ def best_int(max_value: int) -> type[signedinteger[Any]]:
raise ValueError(f"Value {max_value} is too large for int64.")


def fitting_label_dtype(max_value: int) -> type[signedinteger[Any]]:
"""
Narrowest label dtype that holds ``max_value``, but never narrower than
``options["label_dtype"]``.

The configured ``label_dtype`` acts as a floor: models that fit it keep a
single, predictable dtype, while models exceeding it are widened (e.g. to
``int64``) instead of overflowing.
"""
floor = options["label_dtype"]
fit = best_int(max_value) if max_value >= 0 else floor
return max(floor, fit, key=lambda t: np.dtype(t).itemsize)


def astype_labels(da: DataArray, fill_value: int = -1) -> DataArray:
"""
Fill missing entries and cast a labels array to the narrowest int dtype that
holds its values without truncation (see :func:`fitting_label_dtype`).
"""
filled = da.fillna(fill_value)
max_value = int(filled.max()) if filled.size else 0
return filled.astype(fitting_label_dtype(max_value))


def get_index_map(*arrays: Sequence[Hashable]) -> dict[tuple, int]:
"""
Given arrays of hashable objects, create a map from unique combinations to unique integers.
Expand Down
9 changes: 9 additions & 0 deletions linopy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@

from typing import Any

import numpy as np

_VALID_LABEL_DTYPES = {np.int32, np.int64}


class OptionSettings:
"""Runtime configuration knobs (e.g. display widths). Use as a context manager or set values directly via ``options(key=value)``."""
Expand All @@ -30,6 +34,10 @@ def set_value(self, **kwargs: Any) -> None:
for k, v in kwargs.items():
if k not in self._defaults:
raise KeyError(f"{k} is not a valid setting.")
if k == "label_dtype" and v not in _VALID_LABEL_DTYPES:
raise ValueError(
f"label_dtype must be one of {_VALID_LABEL_DTYPES}, got {v}"
)
self._current_values[k] = v

def get_value(self, name: str) -> Any:
Expand Down Expand Up @@ -62,4 +70,5 @@ def __repr__(self) -> str:
options = OptionSettings(
display_max_rows=14,
display_max_terms=6,
label_dtype=np.int32,
)
5 changes: 4 additions & 1 deletion linopy/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -2181,7 +2181,10 @@ def flat(self) -> pd.DataFrame:
return pd.DataFrame(columns=["coeffs", "vars", "labels", "key"])
df = pd.concat(dfs, ignore_index=True)
unique_labels = df.labels.unique()
map_labels = pd.Series(np.arange(len(unique_labels)), index=unique_labels)
map_labels = pd.Series(
np.arange(len(unique_labels), dtype=options["label_dtype"]),
index=unique_labels,
)
df["key"] = df.labels.map(map_labels)
return df

Expand Down
9 changes: 5 additions & 4 deletions linopy/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
EmptyDeprecationWrapper,
LocIndexer,
assign_multiindex_safe,
astype_labels,
check_common_keys_values,
check_has_nulls,
check_has_nulls_polars,
Expand Down Expand Up @@ -451,7 +452,7 @@ def __init__(self, data: Dataset | Any | None, model: Model) -> None:
)

if np.issubdtype(data.vars, np.floating):
data = assign_multiindex_safe(data, vars=data.vars.fillna(-1).astype(int))
data = assign_multiindex_safe(data, vars=astype_labels(data.vars))
if not np.issubdtype(data.coeffs, np.floating):
data["coeffs"].values = data.coeffs.values.astype(float)

Expand Down Expand Up @@ -1535,7 +1536,7 @@ def sanitize(self) -> Self:
linopy.LinearExpression
"""
if not np.issubdtype(self.vars.dtype, np.integer):
return self.assign(vars=self.vars.fillna(-1).astype(int))
return self.assign(vars=astype_labels(self.vars))

return self

Expand Down Expand Up @@ -1939,12 +1940,12 @@ def _simplify_row(vars_row: np.ndarray, coeffs_row: np.ndarray) -> np.ndarray:
# Combined has dimensions (.., CV_DIM, TERM_DIM)

# Drop terms where all vars are -1 (i.e., empty terms across all coordinates)
vars = combined.isel({CV_DIM: 0}).astype(int)
vars = astype_labels(combined.isel({CV_DIM: 0}))
non_empty_terms = (vars != -1).any(dim=[d for d in vars.dims if d != TERM_DIM])
combined = combined.isel({TERM_DIM: non_empty_terms})

# Extract vars and coeffs from the combined result
vars = combined.isel({CV_DIM: 0}).astype(int)
vars = astype_labels(combined.isel({CV_DIM: 0}))
coeffs = combined.isel({CV_DIM: 1})

# Create new dataset with simplified data
Expand Down
9 changes: 7 additions & 2 deletions linopy/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from linopy.common import (
assign_multiindex_safe,
best_int,
fitting_label_dtype,
maybe_replace_signs,
replace_by_map,
to_path,
Expand Down Expand Up @@ -824,7 +825,9 @@ def add_variables(

start = self._xCounter
end = start + data.labels.size
data.labels.values = np.arange(start, end).reshape(data.labels.shape)
data.labels.values = np.arange(
start, end, dtype=fitting_label_dtype(end)
).reshape(data.labels.shape)
self._xCounter += data.labels.size

if mask is not None:
Expand Down Expand Up @@ -969,7 +972,9 @@ def _allocate_constraint_labels(
"""Assign label ranges from the constraint counter and apply an optional mask."""
start = self._cCounter
end = start + data.labels.size
data.labels.values = np.arange(start, end).reshape(data.labels.shape)
data.labels.values = np.arange(
start, end, dtype=fitting_label_dtype(end)
).reshape(data.labels.shape)
self._cCounter += data.labels.size
if mask is not None:
data.labels.values = np.where(mask.values, data.labels.values, -1)
Expand Down
14 changes: 10 additions & 4 deletions linopy/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
LocIndexer,
VariableLabelIndex,
assign_multiindex_safe,
astype_labels,
check_has_nulls,
check_has_nulls_polars,
filter_nulls_polars,
Expand Down Expand Up @@ -1265,14 +1266,15 @@ def ffill(self, dim: str, limit: None = None) -> Variable:
-------
linopy.Variable
"""
label_dtype = self.labels.dtype
data = (
self.data.where(self.labels != -1)
# .ffill(dim, limit=limit)
# breaks with Dataset.ffill, use map instead
.map(DataArray.ffill, dim=dim, limit=limit)
.fillna(self._fill_value)
)
return self.assign_multiindex_safe(labels=data.labels.astype(int))
return self.assign_multiindex_safe(labels=data.labels.astype(label_dtype))

def bfill(self, dim: str, limit: None = None) -> Variable:
"""
Expand All @@ -1292,14 +1294,15 @@ def bfill(self, dim: str, limit: None = None) -> Variable:
-------
linopy.Variable
"""
label_dtype = self.labels.dtype
data = (
self.data.where(~self.isnull())
# .bfill(dim, limit=limit)
# breaks with Dataset.bfill, use map instead
.map(DataArray.bfill, dim=dim, limit=limit)
.fillna(self._fill_value)
)
return self.assign(labels=data.labels.astype(int))
return self.assign(labels=data.labels.astype(label_dtype))

def sanitize(self) -> Variable:
"""
Expand All @@ -1310,7 +1313,7 @@ def sanitize(self) -> Variable:
linopy.Variable
"""
if issubdtype(self.labels.dtype, floating):
return self.assign(labels=self.labels.fillna(-1).astype(int))
return self.assign(labels=astype_labels(self.labels))
return self

def equals(self, other: Variable) -> bool:
Expand Down Expand Up @@ -2032,7 +2035,10 @@ def flat(self) -> pd.DataFrame:
"""
df = pd.concat([self[k].flat for k in self], ignore_index=True)
unique_labels = df.labels.unique()
map_labels = pd.Series(np.arange(len(unique_labels)), index=unique_labels)
map_labels = pd.Series(
np.arange(len(unique_labels), dtype=options["label_dtype"]),
index=unique_labels,
)
df["key"] = df.labels.map(map_labels)
return df

Expand Down
102 changes: 102 additions & 0 deletions test/test_dtypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""Tests for int32 default label dtype."""

import numpy as np
import pytest

from linopy import Model
from linopy.config import options


def test_default_label_dtype_is_int32() -> None:
assert options["label_dtype"] == np.int32


def test_variable_labels_are_int32() -> None:
m = Model()
x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x")
assert x.labels.dtype == np.int32


def test_constraint_labels_are_int32() -> None:
m = Model()
x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x")
m.add_constraints(x >= 1, name="c")
assert m.constraints["c"].labels.dtype == np.int32


def test_expression_vars_are_int32() -> None:
m = Model()
x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x")
expr = 2 * x + 1
assert expr.vars.dtype == np.int32


@pytest.mark.skipif(
not pytest.importorskip("highspy", reason="highspy not installed"),
reason="highspy not installed",
)
def test_solve_with_int32_labels() -> None:
m = Model()
x = m.add_variables(lower=0, upper=10, name="x")
y = m.add_variables(lower=0, upper=10, name="y")
m.add_constraints(x + y <= 15, name="c1")
m.add_objective(x + 2 * y, sense="max")
m.solve("highs")
assert m.objective.value == pytest.approx(25.0)


def test_variable_labels_widen_past_int32() -> None:
m = Model()
m._xCounter = np.iinfo(np.int32).max - 1
x = m.add_variables(lower=0, upper=1, coords=[range(5)], name="x")
assert x.labels.dtype == np.int64
assert int(x.labels.max()) > np.iinfo(np.int32).max


def test_constraint_labels_widen_past_int32() -> None:
m = Model()
x = m.add_variables(lower=0, upper=1, coords=[range(5)], name="x")
m._cCounter = np.iinfo(np.int32).max - 1
m.add_constraints(x >= 0, name="c")
assert m.constraints["c"].labels.dtype == np.int64
assert int(m.constraints["c"].labels.max()) > np.iinfo(np.int32).max


def test_fitting_label_dtype_floors_and_widens() -> None:
from linopy.common import fitting_label_dtype

# below the int32 ceiling: floored at the configured default
assert fitting_label_dtype(100) == np.int32
assert fitting_label_dtype(np.iinfo(np.int32).max) == np.int32
# above it: widened, never truncated
assert fitting_label_dtype(np.iinfo(np.int32).max + 1) == np.int64


def test_astype_labels_preserves_values_past_int32() -> None:
# The label cast-back paths (ffill / sanitize / save_join / ...) must not
# truncate labels beyond the int32 ceiling back to the int32 default.
from xarray import DataArray

from linopy.common import astype_labels

big = np.iinfo(np.int32).max + 10
# simulate the float round-trip these paths see (NaN -> -1 fill)
da = DataArray(np.array([big, big + 1, np.nan], dtype=float))
out = astype_labels(da)
assert out.dtype == np.int64
np.testing.assert_array_equal(out.values, [big, big + 1, -1])


def test_label_dtype_option_int64() -> None:
with options:
options["label_dtype"] = np.int64
m = Model()
x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x")
assert x.labels.dtype == np.int64
expr = 2 * x + 1
assert expr.vars.dtype == np.int64


def test_label_dtype_rejects_invalid() -> None:
with pytest.raises(ValueError, match="label_dtype must be one of"):
options["label_dtype"] = np.float64