perf: Use numpy reshape in _build_typical_das (4.4x faster)

FBumann · claude · FBumann · commit 914580faa54f · 2026-01-16T22:35:00.000+01:00
Eliminated 451,856 slow pandas .loc calls by using numpy reshape
for segmented clustering data instead of iterating per-cluster.

cluster() with segments benchmark (50 clusters, 4 segments):
- Before: ~93.7s
- After: ~21.1s
- Speedup: 4.4x

Co-Authored-By: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py
@@ -195,15 +195,20 @@ def _build_typical_das(
         for key, tsam_result in tsam_aggregation_results.items():
             typical_df = tsam_result.cluster_representatives
             if is_segmented:
-                # Segmented data: MultiIndex (Segment Step, Segment Duration)
-                # Need to extract by cluster (first level of index)
-                for col in typical_df.columns:
-                    data = np.zeros((actual_n_clusters, n_time_points))
-                    for cluster_id in range(actual_n_clusters):
-                        cluster_data = typical_df.loc[cluster_id, col]
-                        data[cluster_id, :] = cluster_data.values[:n_time_points]
+                # Segmented data: MultiIndex with cluster as first level
+                # Each cluster has exactly n_time_points rows (segments)
+                # Extract all data at once using numpy reshape, avoiding slow .loc calls
+                columns = typical_df.columns.tolist()
+
+                # Get all values as numpy array: (n_clusters * n_time_points, n_columns)
+                all_values = typical_df.values
+
+                # Reshape to (n_clusters, n_time_points, n_columns)
+                reshaped = all_values.reshape(actual_n_clusters, n_time_points, -1)
+
+                for col_idx, col in enumerate(columns):
                     typical_das.setdefault(col, {})[key] = xr.DataArray(
-                        data,
+                        reshaped[:, :, col_idx],
                         dims=['cluster', 'time'],
                         coords={'cluster': cluster_coords, 'time': time_coords},
                     )