Skip to content

Commit 914580f

Browse files
FBumannclaude
andcommitted
perf: Use numpy reshape in _build_typical_das (4.4x faster)
Eliminated 451,856 slow pandas .loc calls by using numpy reshape for segmented clustering data instead of iterating per-cluster. cluster() with segments benchmark (50 clusters, 4 segments): - Before: ~93.7s - After: ~21.1s - Speedup: 4.4x Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent b215959 commit 914580f

1 file changed

Lines changed: 13 additions & 8 deletions

File tree

flixopt/transform_accessor.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -195,15 +195,20 @@ def _build_typical_das(
195195
for key, tsam_result in tsam_aggregation_results.items():
196196
typical_df = tsam_result.cluster_representatives
197197
if is_segmented:
198-
# Segmented data: MultiIndex (Segment Step, Segment Duration)
199-
# Need to extract by cluster (first level of index)
200-
for col in typical_df.columns:
201-
data = np.zeros((actual_n_clusters, n_time_points))
202-
for cluster_id in range(actual_n_clusters):
203-
cluster_data = typical_df.loc[cluster_id, col]
204-
data[cluster_id, :] = cluster_data.values[:n_time_points]
198+
# Segmented data: MultiIndex with cluster as first level
199+
# Each cluster has exactly n_time_points rows (segments)
200+
# Extract all data at once using numpy reshape, avoiding slow .loc calls
201+
columns = typical_df.columns.tolist()
202+
203+
# Get all values as numpy array: (n_clusters * n_time_points, n_columns)
204+
all_values = typical_df.values
205+
206+
# Reshape to (n_clusters, n_time_points, n_columns)
207+
reshaped = all_values.reshape(actual_n_clusters, n_time_points, -1)
208+
209+
for col_idx, col in enumerate(columns):
205210
typical_das.setdefault(col, {})[key] = xr.DataArray(
206-
data,
211+
reshaped[:, :, col_idx],
207212
dims=['cluster', 'time'],
208213
coords={'cluster': cluster_coords, 'time': time_coords},
209214
)

0 commit comments

Comments
 (0)