fluxopt · FBumann · Feb 1, 2026 · Feb 1, 2026 · Feb 1, 2026 · Feb 2, 2026
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
@@ -19,6 +19,7 @@ Upcoming Version
 
 *Other*
 
+* Default internal integer labels to ``int32`` (configurable via ``linopy.options["label_dtype"]``, set to ``np.int64`` for the old behavior), cutting memory ~25% and speeding up model build 10-35%. Models exceeding the int32 maximum automatically widen their labels to a larger int dtype instead of raising; ``label_dtype`` acts as a floor.
 * ``add_variables(binary=True, ...)`` now accepts ``lower``/``upper`` bounds, as long as they are 0 or 1. Previously binary bounds could only be set via the ``.lower``/``.upper`` setters after creation. (https://github.com/PyPSA/linopy/issues/776)
 * ``add_piecewise_formulation`` gained an ``active_fill`` parameter that gates a partial ``active`` (defined over a subset of the indexed dimension, or masked) as always-active (``1``) or always-off (``0``); without it, a partial ``active`` — which was previously zeroed silently — now raises. Useful when one formulation mixes gated and ungated entities (e.g. committable and non-committable units sharing a ``status``). ``active_fill`` is transitional and will be removed once v1 semantics make ``active.reindex(coords).fillna(value)`` sufficient. (https://github.com/PyPSA/linopy/issues/796)
 

diff --git a/linopy/common.py b/linopy/common.py
@@ -8,7 +8,6 @@
 from __future__ import annotations
 
 import operator
-import os
 from collections.abc import Callable, Generator, Hashable, Iterable, Sequence
 from functools import cached_property, reduce, wraps
 from pathlib import Path
@@ -159,12 +158,10 @@ def infer_schema_polars(ds: Dataset) -> dict[str, DataTypeClass]:
         dict: A dictionary mapping column names to their corresponding Polars data types.
     """
     schema: dict[str, DataTypeClass] = {}
-    np_major_version = int(np.__version__.split(".")[0])
-    use_int32 = os.name == "nt" and np_major_version < 2
     for name, array in ds.items():
         name = str(name)
         if np.issubdtype(array.dtype, np.integer):
-            schema[name] = pl.Int32 if use_int32 else pl.Int64
+            schema[name] = pl.Int32 if array.dtype.itemsize <= 4 else pl.Int64
         elif np.issubdtype(array.dtype, np.floating):
             schema[name] = pl.Float64
         elif np.issubdtype(array.dtype, np.bool_):
@@ -308,7 +305,7 @@ def save_join(*dataarrays: DataArray, integer_dtype: bool = False) -> Dataset:
         )
         arrs = xr_align(*dataarrays, join="outer")
         if integer_dtype:
-            arrs = tuple([ds.fillna(-1).astype(int) for ds in arrs])
+            arrs = tuple([astype_labels(ds) for ds in arrs])
     return Dataset({ds.name: ds for ds in arrs})
 
 
@@ -487,6 +484,30 @@ def best_int(max_value: int) -> type[signedinteger[Any]]:
     raise ValueError(f"Value {max_value} is too large for int64.")
 
 
+def fitting_label_dtype(max_value: int) -> type[signedinteger[Any]]:
+    """
+    Narrowest label dtype that holds ``max_value``, but never narrower than
+    ``options["label_dtype"]``.
+
+    The configured ``label_dtype`` acts as a floor: models that fit it keep a
+    single, predictable dtype, while models exceeding it are widened (e.g. to
+    ``int64``) instead of overflowing.
+    """
+    floor = options["label_dtype"]
+    fit = best_int(max_value) if max_value >= 0 else floor
+    return max(floor, fit, key=lambda t: np.dtype(t).itemsize)
+
+
+def astype_labels(da: DataArray, fill_value: int = -1) -> DataArray:
+    """
+    Fill missing entries and cast a labels array to the narrowest int dtype that
+    holds its values without truncation (see :func:`fitting_label_dtype`).
+    """
+    filled = da.fillna(fill_value)
+    max_value = int(filled.max()) if filled.size else 0
+    return filled.astype(fitting_label_dtype(max_value))
+
+
 def get_index_map(*arrays: Sequence[Hashable]) -> dict[tuple, int]:
     """
     Given arrays of hashable objects, create a map from unique combinations to unique integers.

diff --git a/linopy/config.py b/linopy/config.py
@@ -9,6 +9,10 @@
 
 from typing import Any
 
+import numpy as np
+
+_VALID_LABEL_DTYPES = {np.int32, np.int64}
+
 
 class OptionSettings:
     """Runtime configuration knobs (e.g. display widths). Use as a context manager or set values directly via ``options(key=value)``."""
@@ -30,6 +34,10 @@ def set_value(self, **kwargs: Any) -> None:
         for k, v in kwargs.items():
             if k not in self._defaults:
                 raise KeyError(f"{k} is not a valid setting.")
+            if k == "label_dtype" and v not in _VALID_LABEL_DTYPES:
+                raise ValueError(
+                    f"label_dtype must be one of {_VALID_LABEL_DTYPES}, got {v}"
+                )
             self._current_values[k] = v
 
     def get_value(self, name: str) -> Any:
@@ -62,4 +70,5 @@ def __repr__(self) -> str:
 options = OptionSettings(
     display_max_rows=14,
     display_max_terms=6,
+    label_dtype=np.int32,
 )
diff --git a/linopy/constraints.py b/linopy/constraints.py
@@ -2181,7 +2181,10 @@ def flat(self) -> pd.DataFrame:
             return pd.DataFrame(columns=["coeffs", "vars", "labels", "key"])
         df = pd.concat(dfs, ignore_index=True)
         unique_labels = df.labels.unique()
-        map_labels = pd.Series(np.arange(len(unique_labels)), index=unique_labels)
+        map_labels = pd.Series(
+            np.arange(len(unique_labels), dtype=options["label_dtype"]),
+            index=unique_labels,
+        )
         df["key"] = df.labels.map(map_labels)
         return df
 

diff --git a/linopy/expressions.py b/linopy/expressions.py
@@ -49,6 +49,7 @@
     EmptyDeprecationWrapper,
     LocIndexer,
     assign_multiindex_safe,
+    astype_labels,
     check_common_keys_values,
     check_has_nulls,
     check_has_nulls_polars,
@@ -451,7 +452,7 @@ def __init__(self, data: Dataset | Any | None, model: Model) -> None:
             )
 
         if np.issubdtype(data.vars, np.floating):
-            data = assign_multiindex_safe(data, vars=data.vars.fillna(-1).astype(int))
+            data = assign_multiindex_safe(data, vars=astype_labels(data.vars))
         if not np.issubdtype(data.coeffs, np.floating):
             data["coeffs"].values = data.coeffs.values.astype(float)
 
@@ -1535,7 +1536,7 @@ def sanitize(self) -> Self:
         linopy.LinearExpression
         """
         if not np.issubdtype(self.vars.dtype, np.integer):
-            return self.assign(vars=self.vars.fillna(-1).astype(int))
+            return self.assign(vars=astype_labels(self.vars))
 
         return self
 
@@ -1939,12 +1940,12 @@ def _simplify_row(vars_row: np.ndarray, coeffs_row: np.ndarray) -> np.ndarray:
         # Combined has dimensions (.., CV_DIM, TERM_DIM)
 
         # Drop terms where all vars are -1 (i.e., empty terms across all coordinates)
-        vars = combined.isel({CV_DIM: 0}).astype(int)
+        vars = astype_labels(combined.isel({CV_DIM: 0}))
         non_empty_terms = (vars != -1).any(dim=[d for d in vars.dims if d != TERM_DIM])
         combined = combined.isel({TERM_DIM: non_empty_terms})
 
         # Extract vars and coeffs from the combined result
-        vars = combined.isel({CV_DIM: 0}).astype(int)
+        vars = astype_labels(combined.isel({CV_DIM: 0}))
         coeffs = combined.isel({CV_DIM: 1})
 
         # Create new dataset with simplified data

diff --git a/linopy/model.py b/linopy/model.py
@@ -31,6 +31,7 @@
 from linopy.common import (
     assign_multiindex_safe,
     best_int,
+    fitting_label_dtype,
     maybe_replace_signs,
     replace_by_map,
     to_path,
@@ -824,7 +825,9 @@ def add_variables(
 
         start = self._xCounter
         end = start + data.labels.size
-        data.labels.values = np.arange(start, end).reshape(data.labels.shape)
+        data.labels.values = np.arange(
+            start, end, dtype=fitting_label_dtype(end)
+        ).reshape(data.labels.shape)
         self._xCounter += data.labels.size
 
         if mask is not None:
@@ -969,7 +972,9 @@ def _allocate_constraint_labels(
         """Assign label ranges from the constraint counter and apply an optional mask."""
         start = self._cCounter
         end = start + data.labels.size
-        data.labels.values = np.arange(start, end).reshape(data.labels.shape)
+        data.labels.values = np.arange(
+            start, end, dtype=fitting_label_dtype(end)
+        ).reshape(data.labels.shape)
         self._cCounter += data.labels.size
         if mask is not None:
             data.labels.values = np.where(mask.values, data.labels.values, -1)

diff --git a/linopy/variables.py b/linopy/variables.py
@@ -37,6 +37,7 @@
     LocIndexer,
     VariableLabelIndex,
     assign_multiindex_safe,
+    astype_labels,
     check_has_nulls,
     check_has_nulls_polars,
     filter_nulls_polars,
@@ -1265,14 +1266,15 @@ def ffill(self, dim: str, limit: None = None) -> Variable:
         -------
         linopy.Variable
         """
+        label_dtype = self.labels.dtype
         data = (
             self.data.where(self.labels != -1)
             # .ffill(dim, limit=limit)
             # breaks with Dataset.ffill, use map instead
             .map(DataArray.ffill, dim=dim, limit=limit)
             .fillna(self._fill_value)
         )
-        return self.assign_multiindex_safe(labels=data.labels.astype(int))
+        return self.assign_multiindex_safe(labels=data.labels.astype(label_dtype))
 
     def bfill(self, dim: str, limit: None = None) -> Variable:
         """
@@ -1292,14 +1294,15 @@ def bfill(self, dim: str, limit: None = None) -> Variable:
         -------
         linopy.Variable
         """
+        label_dtype = self.labels.dtype
         data = (
             self.data.where(~self.isnull())
             # .bfill(dim, limit=limit)
             # breaks with Dataset.bfill, use map instead
             .map(DataArray.bfill, dim=dim, limit=limit)
             .fillna(self._fill_value)
         )
-        return self.assign(labels=data.labels.astype(int))
+        return self.assign(labels=data.labels.astype(label_dtype))
 
     def sanitize(self) -> Variable:
         """
@@ -1310,7 +1313,7 @@ def sanitize(self) -> Variable:
         linopy.Variable
         """
         if issubdtype(self.labels.dtype, floating):
-            return self.assign(labels=self.labels.fillna(-1).astype(int))
+            return self.assign(labels=astype_labels(self.labels))
         return self
 
     def equals(self, other: Variable) -> bool:
@@ -2032,7 +2035,10 @@ def flat(self) -> pd.DataFrame:
         """
         df = pd.concat([self[k].flat for k in self], ignore_index=True)
         unique_labels = df.labels.unique()
-        map_labels = pd.Series(np.arange(len(unique_labels)), index=unique_labels)
+        map_labels = pd.Series(
+            np.arange(len(unique_labels), dtype=options["label_dtype"]),
+            index=unique_labels,
+        )
         df["key"] = df.labels.map(map_labels)
         return df
 

diff --git a/test/test_dtypes.py b/test/test_dtypes.py
@@ -0,0 +1,102 @@
+"""Tests for int32 default label dtype."""
+
+import numpy as np
+import pytest
+
+from linopy import Model
+from linopy.config import options
+
+
+def test_default_label_dtype_is_int32() -> None:
+    assert options["label_dtype"] == np.int32
+
+
+def test_variable_labels_are_int32() -> None:
+    m = Model()
+    x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x")
+    assert x.labels.dtype == np.int32
+
+
+def test_constraint_labels_are_int32() -> None:
+    m = Model()
+    x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x")
+    m.add_constraints(x >= 1, name="c")
+    assert m.constraints["c"].labels.dtype == np.int32
+
+
+def test_expression_vars_are_int32() -> None:
+    m = Model()
+    x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x")
+    expr = 2 * x + 1
+    assert expr.vars.dtype == np.int32
+
+
+@pytest.mark.skipif(
+    not pytest.importorskip("highspy", reason="highspy not installed"),
+    reason="highspy not installed",
+)
+def test_solve_with_int32_labels() -> None:
+    m = Model()
+    x = m.add_variables(lower=0, upper=10, name="x")
+    y = m.add_variables(lower=0, upper=10, name="y")
+    m.add_constraints(x + y <= 15, name="c1")
+    m.add_objective(x + 2 * y, sense="max")
+    m.solve("highs")
+    assert m.objective.value == pytest.approx(25.0)
+
+
+def test_variable_labels_widen_past_int32() -> None:
+    m = Model()
+    m._xCounter = np.iinfo(np.int32).max - 1
+    x = m.add_variables(lower=0, upper=1, coords=[range(5)], name="x")
+    assert x.labels.dtype == np.int64
+    assert int(x.labels.max()) > np.iinfo(np.int32).max
+
+
+def test_constraint_labels_widen_past_int32() -> None:
+    m = Model()
+    x = m.add_variables(lower=0, upper=1, coords=[range(5)], name="x")
+    m._cCounter = np.iinfo(np.int32).max - 1
+    m.add_constraints(x >= 0, name="c")
+    assert m.constraints["c"].labels.dtype == np.int64
+    assert int(m.constraints["c"].labels.max()) > np.iinfo(np.int32).max
+
+
+def test_fitting_label_dtype_floors_and_widens() -> None:
+    from linopy.common import fitting_label_dtype
+
+    # below the int32 ceiling: floored at the configured default
+    assert fitting_label_dtype(100) == np.int32
+    assert fitting_label_dtype(np.iinfo(np.int32).max) == np.int32
+    # above it: widened, never truncated
+    assert fitting_label_dtype(np.iinfo(np.int32).max + 1) == np.int64
+
+
+def test_astype_labels_preserves_values_past_int32() -> None:
+    # The label cast-back paths (ffill / sanitize / save_join / ...) must not
+    # truncate labels beyond the int32 ceiling back to the int32 default.
+    from xarray import DataArray
+
+    from linopy.common import astype_labels
+
+    big = np.iinfo(np.int32).max + 10
+    # simulate the float round-trip these paths see (NaN -> -1 fill)
+    da = DataArray(np.array([big, big + 1, np.nan], dtype=float))
+    out = astype_labels(da)
+    assert out.dtype == np.int64
+    np.testing.assert_array_equal(out.values, [big, big + 1, -1])
+
+
+def test_label_dtype_option_int64() -> None:
+    with options:
+        options["label_dtype"] = np.int64
+        m = Model()
+        x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x")
+        assert x.labels.dtype == np.int64
+        expr = 2 * x + 1
+        assert expr.vars.dtype == np.int64
+
+
+def test_label_dtype_rejects_invalid() -> None:
+    with pytest.raises(ValueError, match="label_dtype must be one of"):
+        options["label_dtype"] = np.float64
-Original file line number
+Diff line change
@@ Expand Up / @@ -19,6 +19,7 @@ Upcoming Version @@
     *Other*
+    * Default internal integer labels to ``int32`` (configurable via ``linopy.options["label_dtype"]``, set to ``np.int64`` for the old behavior), cutting memory ~25% and speeding up model build 10-35%. Models exceeding the int32 maximum automatically widen their labels to a larger int dtype instead of raising; ``label_dtype`` acts as a floor.
     * ``add_variables(binary=True, ...)`` now accepts ``lower``/``upper`` bounds, as long as they are 0 or 1. Previously binary bounds could only be set via the ``.lower``/``.upper`` setters after creation. (https://github.com/PyPSA/linopy/issues/776)
     * ``add_piecewise_formulation`` gained an ``active_fill`` parameter that gates a partial ``active`` (defined over a subset of the indexed dimension, or masked) as always-active (``1``) or always-off (``0``); without it, a partial ``active`` — which was previously zeroed silently — now raises. Useful when one formulation mixes gated and ungated entities (e.g. committable and non-committable units sharing a ``status``). ``active_fill`` is transitional and will be removed once v1 semantics make ``active.reindex(coords).fillna(value)`` sufficient. (https://github.com/PyPSA/linopy/issues/796)
@@ Expand Down @@