From ef60de3a3c0ab540d30ddc032170191e9be51411 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Fri, 22 May 2026 15:24:09 +0100 Subject: [PATCH 01/18] Refactor callbacks --- ml_peg/app/base_app.py | 39 ++++- ml_peg/app/build_app.py | 45 +++-- ml_peg/app/utils/build_components.py | 117 +++++-------- ml_peg/app/utils/register_callbacks.py | 217 ++++++++++++++++++------- 4 files changed, 269 insertions(+), 149 deletions(-) diff --git a/ml_peg/app/base_app.py b/ml_peg/app/base_app.py index 1fc1ecfce..5f9f0691e 100644 --- a/ml_peg/app/base_app.py +++ b/ml_peg/app/base_app.py @@ -5,6 +5,7 @@ from abc import ABC, abstractmethod from pathlib import Path +from dash.dcc import Store from dash.development.base_component import Component from dash.html import Div @@ -31,7 +32,7 @@ class BaseApp(ABC): URL for online documentation. Default is None. framework_id Framework identifier used for benchmark attribution tags. Default is - ``"ml_peg"``. + `"ml_peg"`. """ def __init__( @@ -60,6 +61,7 @@ def __init__( URL to online documentation. Default is None. framework_id Framework identifier used for benchmark attribution tags. + Default is `"ml_peg"`. """ self.name = name self.description = description @@ -91,7 +93,7 @@ def build_layout(self) -> Div: framework_id=self.framework_id, table=self.table, column_widths=getattr(self.table, "column_widths", None), - thresholds=getattr(self.table, "thresholds", None), + thresholds=self.table.thresholds, extra_components=self.extra_components, ) @@ -99,3 +101,36 @@ def build_layout(self) -> Div: def register_callbacks(self): """Register callbacks with app.""" pass + + @property + def stores(self) -> list[Store]: + """ + List Stores to be registered with full app. + + Returns + ------- + list[Store] + List of Stores to be registered with full app. + """ + return [ + Store( + id=f"{self.table_id}-computed-store", + storage_type="session", + data=self.table.data, + ), + Store( + id=f"{self.table_id}-raw-data-store", + storage_type="session", + data=self.table.data, + ), + Store( + id=f"{self.table_id}-weight-store", + storage_type="session", + data=self.table.weights, + ), + Store( + id=f"{self.table_id}-thresholds-store", + storage_type="session", + data=self.table.thresholds, + ), + ] diff --git a/ml_peg/app/build_app.py b/ml_peg/app/build_app.py index dc109c6a1..9858f74e5 100644 --- a/ml_peg/app/build_app.py +++ b/ml_peg/app/build_app.py @@ -25,7 +25,10 @@ build_tutorial_button, register_onboarding_callbacks, ) -from ml_peg.app.utils.register_callbacks import register_benchmark_to_category_callback +from ml_peg.app.utils.register_callbacks import ( + register_benchmark_to_category_callback, + register_filter_tables_callback, +) from ml_peg.app.utils.utils import ( build_level_of_theory_warnings, get_framework_config, @@ -342,6 +345,7 @@ def build_sidebar( def get_all_tests( category: str = "*", ) -> tuple[ + dict[str, dict[str, Dash]], dict[str, dict[str, list[Div]]], dict[str, dict[str, DataTable]], dict[str, dict[str, str]], @@ -357,12 +361,13 @@ def get_all_tests( Returns ------- tuple - Layouts, tables, and framework IDs for all categories. + Apps by test name, and layouts, tables, and framework IDs for all categories. """ # Find Python files e.g. app_OC157.py in mlip_tesing.app module. # We will get the category from the parent's parent directory # E.g. ml_peg/app/surfaces/OC157/app_OC157.py -> surfaces tests = APP_ROOT.glob(f"{category}/*/app*.py") + apps = {} layouts = {} tables = {} frameworks = {} @@ -377,15 +382,18 @@ def get_all_tests( f"ml_peg.app.{category_name}.{test_name}.app_{test_name}" ) test_app = test_module.get_app() + apps[test_name] = test_app # Get layouts and tables for each category/test if category_name not in layouts: layouts[category_name] = {} tables[category_name] = {} frameworks[category_name] = {} + layouts[category_name][test_app.name] = test_app.layout tables[category_name][test_app.name] = test_app.table frameworks[category_name][test_app.name] = test_app.framework_id + except FileNotFoundError as err: warnings.warn( f"Unable to load layout for {test_name} in {category_name} category. " @@ -405,7 +413,7 @@ def get_all_tests( ) continue - return layouts, tables, frameworks + return apps, layouts, tables, frameworks def build_category( @@ -439,6 +447,7 @@ def build_category( category_views = {} category_tables = {} category_weights = {} + category_to_title = {} framework_ids: set[str] = set() # `category` corresponds to the category's directory name @@ -458,6 +467,8 @@ def build_category( category_weight = 1 benchmark_weights = {} + category_to_title[category] = category_title + # Build category summary table summary_table = build_summary_table( dict(sorted(all_tables[category].items())), @@ -475,7 +486,6 @@ def build_category( weight_components = build_weight_components( header="Weights", table=summary_table, - include_store=False, include_download_controls=False, column_widths=getattr(summary_table, "column_widths", None), ) @@ -500,15 +510,9 @@ def build_category( "tests": test_entries, } - # Register benchmark table -> category table callbacks - # Category summary table columns add "Score" to name for clarity - for test_name, benchmark_table in all_tables[category].items(): - register_benchmark_to_category_callback( - benchmark_table_id=benchmark_table.id, - category_table_id=f"{category_title}-summary-table", - benchmark_column=test_name + " Score", - model_name_map=getattr(benchmark_table, "model_name_map", None), - ) + # Register callback for all benchmark tables -> category table + # Category summary table columns add "Score" to name for clarity + register_benchmark_to_category_callback(all_tables, category_to_title) return category_views, category_tables, category_weights, framework_ids @@ -836,6 +840,7 @@ def build_nav( framework_views: dict[str, dict[str, object]], summary_table: DataTable, weight_components: Div, + all_apps: dict[str, Dash], ) -> None: """ Build page layouts and sidebar navigation. @@ -852,6 +857,8 @@ def build_nav( Summary table with score from each category. weight_components Weight sliders, text boxes and reset button. + all_apps + Dictionary of all test apps. """ category_paths = { category_name: _category_to_path(category_name) @@ -977,6 +984,11 @@ def build_nav( ), ] ) + + test_state_stores = [] + for app in all_apps.values(): + test_state_stores.extend(app.stores) + global_state_stores = [ Store( id="summary-table-weight-store", @@ -985,6 +997,7 @@ def build_nav( ), Store(id="cmap-store", storage_type="local", data="viridis_r"), *category_state_stores, + *test_state_stores, ] full_layout = [ @@ -1261,11 +1274,13 @@ def build_full_app(full_app: Dash, category: str = "*") -> None: Category to build app for. Default is `*`, corresponding to all categories. """ # Get layouts and tables for each test, grouped by categories - all_layouts, all_tables, all_frameworks = get_all_tests(category=category) + all_apps, all_layouts, all_tables, all_frameworks = get_all_tests(category=category) if not all_layouts: raise ValueError("No tests were built successfully") + register_filter_tables_callback(all_apps) + # Combine tests into categories and create category summary cat_views, cat_tables, cat_weights, framework_ids = build_category( all_layouts, all_tables, all_frameworks @@ -1278,7 +1293,6 @@ def build_full_app(full_app: Dash, category: str = "*") -> None: weight_components = build_weight_components( header="Weights", table=summary_table, - include_store=False, include_download_controls=False, column_widths=summary_table.column_widths, ) @@ -1289,5 +1303,6 @@ def build_full_app(full_app: Dash, category: str = "*") -> None: framework_views, summary_table, weight_components, + all_apps, ) register_onboarding_callbacks() diff --git a/ml_peg/app/utils/build_components.py b/ml_peg/app/utils/build_components.py index 050e57306..49f604d1b 100644 --- a/ml_peg/app/utils/build_components.py +++ b/ml_peg/app/utils/build_components.py @@ -139,7 +139,6 @@ def build_weight_components( *, use_thresholds: bool = False, include_download_controls: bool = True, - include_store: bool = True, column_widths: dict[str, int] | None = None, thresholds: Thresholds | None = None, ) -> Div: @@ -158,10 +157,6 @@ def build_weight_components( recompute Scores consistently. include_download_controls Whether to render download controls in the Score column slot. - include_store - Whether to include this table's weight ``dcc.Store`` in the returned - component. Set to ``False`` when that store is already created elsewhere, - for example in the main app layout. column_widths Optional mapping of table column IDs to pixel widths used to align the inputs with the rendered table. @@ -292,14 +287,6 @@ def build_weight_components( ) layout = [container] - if include_store: - layout.append( - Store( - id=f"{table.id}-weight-store", - storage_type="session", - data=weights, - ) - ) model_levels = getattr(table, "model_levels_of_theory", None) metric_levels = getattr(table, "metric_levels_of_theory", None) @@ -787,10 +774,10 @@ def build_test_layout( description: str, framework_id: str, table: DataTable, + thresholds: Thresholds, extra_components: list[Component] | None = None, docs_url: str | None = None, column_widths: dict[str, int] | None = None, - thresholds: Thresholds | None = None, ) -> Div: """ Build app layout for a test. @@ -806,6 +793,9 @@ def build_test_layout( table Dash Table with metric results. Can include a `weights` attribute to be used by `build_weight_components`. + thresholds + Normalization metadata (metric -> (good, bad, unit)) supplied via the + analysis pipeline. Inline threshold controls are rendered automatically. extra_components List of Dash Components to include after the metrics table. docs_url @@ -813,10 +803,6 @@ def build_test_layout( column_widths Optional column-width mapping inferred from analysis output. Used to align threshold controls beneath the table columns when available. - thresholds - Optional normalization metadata (metric -> (good, bad, unit)) supplied via the - analysis pipeline. When provided, inline threshold controls are rendered - automatically. Returns ------- @@ -875,33 +861,32 @@ def build_test_layout( ) ) - # Inline normalization thresholds when metadata is supplied - threshold_controls = None - if thresholds is not None: - reserved = {"MLIP", "Score", "id"} - metric_columns = [ - col["id"] for col in table.columns if col.get("id") not in reserved - ] - layout_contents.append( - Store( - id=f"{table.id}-raw-data-store", - storage_type="session", - data=table.data, - ) - ) - layout_contents.append( - Store( - id=f"{table.id}-raw-tooltip-store", - storage_type="session", - data=table.tooltip_header, - ) + reserved = {"MLIP", "Score", "id"} + metric_columns = [ + col["id"] for col in table.columns if col.get("id") not in reserved + ] + + layout_contents.append( + Store( + id=f"{table.id}-raw-data-store", + storage_type="session", + data=table.data, ) - threshold_controls = build_threshold_inputs( - table_columns=metric_columns, - thresholds=thresholds, - table_id=table.id, - column_widths=column_widths, + ) + layout_contents.append( + Store( + id=f"{table.id}-raw-tooltip-store", + storage_type="session", + data=table.tooltip_header, ) + ) + + threshold_controls = build_threshold_inputs( + table_columns=metric_columns, + thresholds=thresholds, + table_id=table.id, + column_widths=column_widths, + ) # Add metric-weight controls for every benchmark table metric_weights = build_weight_components( @@ -916,24 +901,21 @@ def build_test_layout( # Build the controls element before the table wrapper so both can go into the # same fit-content div. The controls use width:100% of that wrapper, which # equals the table width, keeping the columns aligned. - if thresholds is not None: - controls_visual = Div( - [ - Div(threshold_controls, style={"marginBottom": "0px"}), - Div(metric_weights, style={"marginTop": "0"}), - ], - style={ - "backgroundColor": "#f8f9fa", - "border": "1px solid #dee2e6", - "borderRadius": "6px", - "padding": "0px 0px 0px 0px", # top right bottom left - "marginTop": "-5px", - "boxSizing": "border-box", - "width": "100%", - }, - ) - else: - controls_visual = metric_weights + controls_visual = Div( + [ + Div(threshold_controls, style={"marginBottom": "0px"}), + Div(metric_weights, style={"marginTop": "0"}), + ], + style={ + "backgroundColor": "#f8f9fa", + "border": "1px solid #dee2e6", + "borderRadius": "6px", + "padding": "0px 0px 0px 0px", # top right bottom left + "marginTop": "-5px", + "boxSizing": "border-box", + "width": "100%", + }, + ) table_section = [ build_download_controls(table.id, row=True), @@ -1199,12 +1181,6 @@ def build_threshold_inputs( ) ) - store = Store( - id=f"{table_id}-thresholds-store", - storage_type="session", - data=default_thresholds, - ) - # Register callbacks for these metrics, pass default_thresholds for reset register_normalization_callbacks( table_id, @@ -1213,9 +1189,4 @@ def build_threshold_inputs( register_toggle=False, ) - return Div( - [ - Div(cells, id=f"{table_id}-threshold-grid", style=container_style), - store, - ] - ) + return Div([Div(cells, id=f"{table_id}-threshold-grid", style=container_style)]) diff --git a/ml_peg/app/utils/register_callbacks.py b/ml_peg/app/utils/register_callbacks.py index bff15b7e9..76280fd1c 100644 --- a/ml_peg/app/utils/register_callbacks.py +++ b/ml_peg/app/utils/register_callbacks.py @@ -19,6 +19,7 @@ dcc, no_update, ) +from dash.dash_table import DataTable from dash.exceptions import PreventUpdate import pandas as pd @@ -277,6 +278,86 @@ def register_category_table_callbacks( model_configs Optional configuration metadata for each model. """ + + @callback( + Output(table_id, "data", allow_duplicate=True), + Output(table_id, "style_data_conditional", allow_duplicate=True), + Input(f"{table_id}-raw-data-store", "data"), + State(f"{table_id}-computed-store", "data"), + State(f"{table_id}-weight-store", "data"), + State(f"{table_id}-thresholds-store", "data"), + State(f"{table_id}-normalized-toggle", "value"), + State("selected-models-store", "data"), + State("cmap-store", "data"), + State(f"{table_id}-raw-tooltip-store", "data"), + State(table_id, "columns"), + prevent_initial_call=True, + optional=True, + ) + def update_table_from_store( + stored_raw_data: list[dict] | None, + stored_computed_data: list[dict] | None, + weights: dict[str, float] | None, + thresholds: dict | None, + toggle_value: list[str] | None, + selected_models: list[str] | None, + cmap_name: str | None, + raw_tooltips: dict[str, str] | None, + current_columns: list[dict] | None, + ) -> list[dict]: + """ + Update visible table from cached data when the raw data store changes. + + Parameters + ---------- + stored_raw_data + Stored raw table data. + stored_computed_data + Stored computed table data. + weights + Stored weights for the table. + thresholds + Stored thresholds for the table. + toggle_value + Value of toggle to show normalised values. + selected_models + List of model names currently selected in the model filter. + cmap_name + Colourmap name from the cmap store. + raw_tooltips + Stored raw tooltip text for the table. + current_columns + Current table columns. + + Returns + ------- + list[dict] + Updated rows for the visible table. + """ + display_rows = get_scores( + stored_raw_data, stored_computed_data, thresholds, toggle_value + ) + scored_rows = calc_metric_scores(stored_raw_data, thresholds=thresholds) + filtered_rows = filter_rows_by_models(display_rows, selected_models) + filtered_scores = filter_rows_by_models(scored_rows, selected_models) + style = ( + get_table_style( + filtered_rows, + scored_data=filtered_scores, + cmap_name=cmap_name or "viridis_r", + ) + if filtered_rows + else [] + ) + style, tooltip_data = apply_level_of_theory_warnings( + filtered_rows, + style, + model_levels=model_levels, + metric_levels=metric_levels, + model_configs=model_configs, + ) + return filtered_rows, style + # Benchmark tables if use_thresholds: @@ -287,7 +368,7 @@ def register_category_table_callbacks( Output(table_id, "columns", allow_duplicate=True), Output(table_id, "tooltip_header", allow_duplicate=True), Output(f"{table_id}-computed-store", "data", allow_duplicate=True), - Output(f"{table_id}-raw-data-store", "data"), + Output(f"{table_id}-raw-data-store", "data", allow_duplicate=True), Input(f"{table_id}-weight-store", "data"), Input(f"{table_id}-thresholds-store", "data"), Input("app-location", "pathname"), @@ -299,6 +380,7 @@ def register_category_table_callbacks( State(f"{table_id}-raw-tooltip-store", "data"), State(table_id, "columns"), prevent_initial_call="initial_duplicate", + optional=True, ) def update_benchmark_table_scores( stored_weights: dict[str, float] | None, @@ -593,83 +675,99 @@ def update_scores_store( def register_benchmark_to_category_callback( - benchmark_table_id: str, - category_table_id: str, - benchmark_column: str, - use_threshold_store: bool = False, - model_name_map: dict[str, str] | None = None, + all_tables: dict[str, dict[str, DataTable]], category_to_title: dict[str, str] ) -> None: """ Propagate a benchmark table's Score into its category summary table column. Parameters ---------- - benchmark_table_id - ID of the benchmark test table (e.g., "OC157-table"). - category_table_id - ID of the category summary table (e.g., "Surfaces-summary-table"). - benchmark_column - Column name in the category summary table corresponding to the benchmark. - use_threshold_store - Whether the benchmark table exposes a normalization store for metrics. - model_name_map - Optional mapping of displayed benchmark MLIP names -> original model names. + all_tables + Tables for all tests, grouped by category. + category_to_title + Dictionary mapping category directory names to their display titles/table IDs. """ - _ = use_threshold_store # cached rows handle normalization - # flag kept for compatibility with existing call sites - name_map = dict(model_name_map or {}) + all_info = {} + for category, tables in all_tables.items(): + all_info[category] = {} + for test_name, benchmark_table in tables.items(): + all_info[category][test_name] = { + "benchmark_table_id": benchmark_table.id, + "benchmark_column": test_name + " Score", + "model_name_map": getattr(benchmark_table, "model_name_map", {}), + } + + outputs = [] + inputs = [] + for category, category_info in sorted(all_info.items()): + category_table_id = f"{category_to_title[category]}-summary-table" + outputs.append( + Output(f"{category_table_id}-computed-store", "data", allow_duplicate=True) + ) - @callback( - Output(f"{category_table_id}-computed-store", "data", allow_duplicate=True), - Input(f"{benchmark_table_id}-computed-store", "data"), - State(f"{category_table_id}-weight-store", "data"), - State(f"{category_table_id}-computed-store", "data"), - prevent_initial_call=True, - ) - def update_category_from_benchmark( - benchmark_computed_store: list[dict] | None, - category_weights: dict[str, float] | None, - category_computed_store: list[dict] | None, - ) -> list[dict]: + inputs.extend( + [ + State(f"{category_table_id}-weight-store", "data"), + State(f"{category_table_id}-computed-store", "data"), + ] + ) + inputs.extend( + [ + Input(f"{table_info['benchmark_table_id']}-computed-store", "data") + for _, table_info in sorted(category_info.items()) + ] + ) + + @callback(outputs, inputs, prevent_initial_call=True) + def update_category_from_benchmark(*args) -> list[list[dict]]: """ - Update cached category summary rows from a benchmark's cached scores. + Update cached category summary rows from all benchmarks' cached scores. Parameters ---------- - benchmark_computed_store - Latest scored benchmark rows emitted by the benchmark table. - category_weights - Stored weights for the category summary metrics. - category_computed_store - Cached scored rows for the category summary. + *args + States and Inputs for all category summary tables and benchmark tables. + Ordered by category. For each category, the weights, computed store, and + benchmark computed stores are listed sequentially. Returns ------- - list[dict] - Refreshed cached rows for the category summary table. + list[list[dict]] + Refreshed cached rows for each category summary table. """ - if not category_computed_store: - raise PreventUpdate - if not benchmark_computed_store: - raise PreventUpdate - category_rows = deepcopy(category_computed_store) + # Rebuild inputs for each category + iterator = iter(args) + + all_category_rows = [] + + for category, category_info in sorted(all_info.items()): + category_weights = next(iterator) + category_rows = deepcopy(next(iterator)) + + for test_name, table_info in sorted(category_info.items()): + benchmark_rows = deepcopy(next(iterator)) + name_map = table_info["model_name_map"] + + benchmark_scores = {} + for row in benchmark_rows: + display_name = row.get("MLIP") + original_name = name_map.get(display_name, display_name) + score = row.get("Score") + if display_name is None or original_name is None: + continue + benchmark_scores[original_name] = score - benchmark_scores: dict[str, float] = {} - for row in benchmark_computed_store: - display_name = row.get("MLIP") - original_name = name_map.get(display_name, display_name) - score = row.get("Score") - if display_name is None or original_name is None or score is None: - continue - benchmark_scores[original_name] = score + for row in category_rows: + mlip = row.get("MLIP") + if mlip in benchmark_scores: + row[all_info[category][test_name]["benchmark_column"]] = ( + benchmark_scores[mlip] + ) - for row in category_rows: - mlip = row.get("MLIP") - if mlip in benchmark_scores: - row[benchmark_column] = benchmark_scores[mlip] + category_rows, _ = update_score_style(category_rows, category_weights) + all_category_rows.append(category_rows) - category_rows, _ = update_score_style(category_rows, category_weights) - return category_rows + return all_category_rows def register_weight_callbacks( @@ -920,6 +1018,7 @@ def sync_threshold_input_styles( State(f"{table_id}", "columns"), State("cmap-store", "data"), prevent_initial_call=True, + optional=True, ) def toggle_normalized_display( show_normalized: list[str] | None, From 7c5fdd2c7add4103829fe5fe9243a95ad1042e0f Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Fri, 22 May 2026 15:25:57 +0100 Subject: [PATCH 02/18] Remove callback function call --- ml_peg/app/build_app.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ml_peg/app/build_app.py b/ml_peg/app/build_app.py index 9858f74e5..7cb37d7aa 100644 --- a/ml_peg/app/build_app.py +++ b/ml_peg/app/build_app.py @@ -25,10 +25,7 @@ build_tutorial_button, register_onboarding_callbacks, ) -from ml_peg.app.utils.register_callbacks import ( - register_benchmark_to_category_callback, - register_filter_tables_callback, -) +from ml_peg.app.utils.register_callbacks import register_benchmark_to_category_callback from ml_peg.app.utils.utils import ( build_level_of_theory_warnings, get_framework_config, @@ -1279,8 +1276,6 @@ def build_full_app(full_app: Dash, category: str = "*") -> None: if not all_layouts: raise ValueError("No tests were built successfully") - register_filter_tables_callback(all_apps) - # Combine tests into categories and create category summary cat_views, cat_tables, cat_weights, framework_ids = build_category( all_layouts, all_tables, all_frameworks From 802dc18ed56cdbd13d189334606a062ce5648837 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Fri, 22 May 2026 17:15:38 +0100 Subject: [PATCH 03/18] Fix None/NaN scores --- ml_peg/analysis/utils/utils.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/ml_peg/analysis/utils/utils.py b/ml_peg/analysis/utils/utils.py index bddffba04..dc264490d 100644 --- a/ml_peg/analysis/utils/utils.py +++ b/ml_peg/analysis/utils/utils.py @@ -465,11 +465,14 @@ def calc_table_scores( # Strict mode: require all metrics to be present metrics_row["Score"] = None elif scores_list: - # Calculate weighted average of available metrics - try: - metrics_row["Score"] = np.average(scores_list, weights=weights_list) - except ZeroDivisionError: - metrics_row["Score"] = np.mean(scores_list) + if np.nan in scores_list: + metrics_row["Score"] = np.nan + else: + # Calculate weighted average of available metrics + try: + metrics_row["Score"] = np.average(scores_list, weights=weights_list) + except ZeroDivisionError: + metrics_row["Score"] = np.mean(scores_list) else: metrics_row["Score"] = None @@ -726,9 +729,9 @@ def normalize_metric( try: # Handle NaNs robustly if np.isnan([value, good_threshold, bad_threshold]).any(): - return None + return np.nan except TypeError: - return None + return np.nan if good_threshold == bad_threshold: return 1.0 if value == good_threshold else 0.0 From 5614289fe08d2f418fa8beda6aec198075d34c82 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 26 May 2026 19:30:44 +0100 Subject: [PATCH 04/18] Fix None scores --- ml_peg/analysis/utils/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ml_peg/analysis/utils/utils.py b/ml_peg/analysis/utils/utils.py index dc264490d..5cfc184ac 100644 --- a/ml_peg/analysis/utils/utils.py +++ b/ml_peg/analysis/utils/utils.py @@ -453,8 +453,8 @@ def calc_table_scores( # Weight of zero excludes the metric from scoring requirements continue - if value is not None: - scores_list.append(scores_row[key]) + if value is not None and (score := scores_row.get(key)) is not None: + scores_list.append(score) weights_list.append(weight) else: # Track if any (weighted) metric is missing From b5afb7e0807dbad14b6e476e70e55a0e39e05992 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 26 May 2026 19:31:14 +0100 Subject: [PATCH 05/18] Fix very large numbers stored --- ml_peg/app/utils/load.py | 3 +++ ml_peg/app/utils/utils.py | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/ml_peg/app/utils/load.py b/ml_peg/app/utils/load.py index 225da0758..b62667f32 100644 --- a/ml_peg/app/utils/load.py +++ b/ml_peg/app/utils/load.py @@ -15,6 +15,7 @@ from ml_peg.app.utils.utils import ( build_level_of_theory_warnings, calculate_column_widths, + clean_table_data, clean_thresholds, clean_weights, is_numeric_column, @@ -53,6 +54,8 @@ def rebuild_table( table_json = json.load(f) data = table_json["data"] + data = clean_table_data(data) + columns = table_json["columns"] model_name_map = dict(table_json.get("model_name_map") or {}) thresholds = clean_thresholds(table_json.get("thresholds")) diff --git a/ml_peg/app/utils/utils.py b/ml_peg/app/utils/utils.py index 79540716c..8137f75d6 100644 --- a/ml_peg/app/utils/utils.py +++ b/ml_peg/app/utils/utils.py @@ -11,6 +11,7 @@ import dash.dash_table.Format as TableFormat from matplotlib import colormaps +import numpy as np import yaml from ml_peg.models import MODELS_ROOT @@ -314,6 +315,29 @@ def clean_weights(raw_weights: dict[str, float] | None) -> dict[str, float]: return weights +def clean_table_data(rows: list[dict]) -> list[dict]: + """ + Ensure data does not exceed int limits. + + Parameters + ---------- + rows + List of table rows to clean. + + Returns + ------- + list[dict] + Cleaned table rows with values larger than int64 limits set to NaN. + """ + for row in rows: + for key, value in row.items(): + if isinstance(value, int | float) and ( + value > np.iinfo(np.int64).max or value < np.iinfo(np.int64).min + ): + row[key] = np.nan + return rows + + def filter_rows_by_models( rows: list[dict] | None, selected_models: Sequence[str] | None, From 3e44b0a0533ed2edb6c802ccab691f6f862226ca Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 26 May 2026 19:31:43 +0100 Subject: [PATCH 06/18] Make callbacks optional --- ml_peg/app/utils/register_callbacks.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ml_peg/app/utils/register_callbacks.py b/ml_peg/app/utils/register_callbacks.py index 76280fd1c..45b11d90c 100644 --- a/ml_peg/app/utils/register_callbacks.py +++ b/ml_peg/app/utils/register_callbacks.py @@ -207,6 +207,7 @@ def update_summary_computed_store( Input("app-location", "pathname"), Input("cmap-store", "data"), prevent_initial_call="initial_duplicate", + optional=True, ) def sync_summary_table( selected_models: list[str] | None, @@ -506,6 +507,7 @@ def update_benchmark_table_scores( current_columns, thresholds, show_normalized ) tooltips = format_tooltip_headers(raw_tooltips, thresholds, show_normalized) + return ( filtered_rows, style, @@ -530,6 +532,7 @@ def update_benchmark_table_scores( State(table_id, "data"), State(f"{table_id}-computed-store", "data"), prevent_initial_call="initial_duplicate", + optional=True, ) def update_table_scores( stored_weights: dict[str, float] | None, @@ -589,6 +592,7 @@ def update_table_scores( Input("app-location", "pathname"), Input("cmap-store", "data"), prevent_initial_call="initial_duplicate", + optional=True, ) def sync_table_from_computed_store( computed_store: list[dict] | None, @@ -796,6 +800,7 @@ def register_weight_callbacks( Input(f"{table_id}-reset-button", "n_clicks"), State(f"{table_id}-weight-store", "data"), prevent_initial_call=True, + optional=True, ) def store_input_value( input_weight: float | None, @@ -839,6 +844,7 @@ def store_input_value( Input(f"{table_id}-weight-store", "data"), Input("app-location", "pathname"), prevent_initial_call="initial_duplicate", + optional=True, ) def sync_inputs(stored_weights: dict[str, float], _pathname: str) -> float: """ @@ -893,6 +899,7 @@ def register_normalization_callbacks( Input(f"{table_id}-reset-thresholds-button", "n_clicks"), State(f"{table_id}-thresholds-store", "data"), prevent_initial_call=True, + optional=True, ) def store_threshold_values( good_val, bad_val, n_clicks, stored_thresholds, metric=metric @@ -976,6 +983,7 @@ def store_threshold_values( *threshold_style_outputs, Input("cmap-store", "data"), prevent_initial_call=False, + optional=True, ) def sync_threshold_input_styles( cmap_name: str | None, @@ -1061,6 +1069,7 @@ def toggle_normalized_display( Output(f"{table_id}-{metric}-bad-threshold", "value"), Input(f"{table_id}-thresholds-store", "data"), prevent_initial_call=True, + optional=True, ) def sync_threshold_inputs(thresholds, metric=metric): """Sync threshold input values with stored thresholds.""" @@ -1112,6 +1121,7 @@ def register_image_download_callbacks() -> None: State({"type": "image-download-format", "index": MATCH}, "value"), State({"type": "image-download-target", "index": MATCH}, "data"), prevent_initial_call=True, + optional=True, ) def _download_image(n_clicks, fmt, uris): """ @@ -1169,6 +1179,7 @@ def register_download_callbacks(table_id: str) -> None: State(table_id, "data"), State(table_id, "columns"), prevent_initial_call=True, + optional=True, ) def download_table( n_clicks: int, @@ -1243,4 +1254,5 @@ def download_table( Output(f"{table_id}-download", "data", allow_duplicate=True), Input(f"{table_id}-download-request", "data"), prevent_initial_call=True, + optional=True, ) From d5133baf83211da56eabebddbcf6382edfdbd51a Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 26 May 2026 20:42:18 +0100 Subject: [PATCH 07/18] Short-circuit summary update --- ml_peg/app/utils/register_callbacks.py | 43 +++++++++++++++++--------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/ml_peg/app/utils/register_callbacks.py b/ml_peg/app/utils/register_callbacks.py index 45b11d90c..813597a59 100644 --- a/ml_peg/app/utils/register_callbacks.py +++ b/ml_peg/app/utils/register_callbacks.py @@ -703,7 +703,10 @@ def register_benchmark_to_category_callback( outputs = [] inputs = [] - for category, category_info in sorted(all_info.items()): + + category_order = sorted(all_info) + for category in category_order: + category_info = all_info[category] category_table_id = f"{category_to_title[category]}-summary-table" outputs.append( Output(f"{category_table_id}-computed-store", "data", allow_duplicate=True) @@ -739,20 +742,23 @@ def update_category_from_benchmark(*args) -> list[list[dict]]: list[list[dict]] Refreshed cached rows for each category summary table. """ - # Rebuild inputs for each category + trigger_id = ctx.triggered_id iterator = iter(args) + all_category_rows = [no_update for _ in category_order] - all_category_rows = [] - - for category, category_info in sorted(all_info.items()): + for category_index, category in enumerate(category_order): + category_info = all_info[category] category_weights = next(iterator) - category_rows = deepcopy(next(iterator)) + category_rows_source = next(iterator) - for test_name, table_info in sorted(category_info.items()): - benchmark_rows = deepcopy(next(iterator)) - name_map = table_info["model_name_map"] + for _test_name, table_info in sorted(category_info.items()): + benchmark_rows = next(iterator) + if f"{table_info['benchmark_table_id']}-computed-store" != trigger_id: + continue + name_map = table_info["model_name_map"] benchmark_scores = {} + for row in benchmark_rows: display_name = row.get("MLIP") original_name = name_map.get(display_name, display_name) @@ -761,15 +767,24 @@ def update_category_from_benchmark(*args) -> list[list[dict]]: continue benchmark_scores[original_name] = score + category_rows = deepcopy(category_rows_source) + benchmark_column = table_info["benchmark_column"] + rows_updated = False + for row in category_rows: mlip = row.get("MLIP") if mlip in benchmark_scores: - row[all_info[category][test_name]["benchmark_column"]] = ( - benchmark_scores[mlip] - ) + new_score = benchmark_scores[mlip] + if row.get(benchmark_column) != new_score: + row[benchmark_column] = new_score + rows_updated = True + + if not rows_updated: + break - category_rows, _ = update_score_style(category_rows, category_weights) - all_category_rows.append(category_rows) + category_rows, _ = update_score_style(category_rows, category_weights) + all_category_rows[category_index] = category_rows + break return all_category_rows From d28fa70daa7a04581a50f65502ef617136f2dbf5 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 26 May 2026 21:03:34 +0100 Subject: [PATCH 08/18] Revert "Short-circuit summary update" This reverts commit d5133baf83211da56eabebddbcf6382edfdbd51a. --- ml_peg/app/utils/register_callbacks.py | 43 +++++++++----------------- 1 file changed, 14 insertions(+), 29 deletions(-) diff --git a/ml_peg/app/utils/register_callbacks.py b/ml_peg/app/utils/register_callbacks.py index 813597a59..45b11d90c 100644 --- a/ml_peg/app/utils/register_callbacks.py +++ b/ml_peg/app/utils/register_callbacks.py @@ -703,10 +703,7 @@ def register_benchmark_to_category_callback( outputs = [] inputs = [] - - category_order = sorted(all_info) - for category in category_order: - category_info = all_info[category] + for category, category_info in sorted(all_info.items()): category_table_id = f"{category_to_title[category]}-summary-table" outputs.append( Output(f"{category_table_id}-computed-store", "data", allow_duplicate=True) @@ -742,23 +739,20 @@ def update_category_from_benchmark(*args) -> list[list[dict]]: list[list[dict]] Refreshed cached rows for each category summary table. """ - trigger_id = ctx.triggered_id + # Rebuild inputs for each category iterator = iter(args) - all_category_rows = [no_update for _ in category_order] - for category_index, category in enumerate(category_order): - category_info = all_info[category] - category_weights = next(iterator) - category_rows_source = next(iterator) + all_category_rows = [] - for _test_name, table_info in sorted(category_info.items()): - benchmark_rows = next(iterator) - if f"{table_info['benchmark_table_id']}-computed-store" != trigger_id: - continue + for category, category_info in sorted(all_info.items()): + category_weights = next(iterator) + category_rows = deepcopy(next(iterator)) + for test_name, table_info in sorted(category_info.items()): + benchmark_rows = deepcopy(next(iterator)) name_map = table_info["model_name_map"] - benchmark_scores = {} + benchmark_scores = {} for row in benchmark_rows: display_name = row.get("MLIP") original_name = name_map.get(display_name, display_name) @@ -767,24 +761,15 @@ def update_category_from_benchmark(*args) -> list[list[dict]]: continue benchmark_scores[original_name] = score - category_rows = deepcopy(category_rows_source) - benchmark_column = table_info["benchmark_column"] - rows_updated = False - for row in category_rows: mlip = row.get("MLIP") if mlip in benchmark_scores: - new_score = benchmark_scores[mlip] - if row.get(benchmark_column) != new_score: - row[benchmark_column] = new_score - rows_updated = True - - if not rows_updated: - break + row[all_info[category][test_name]["benchmark_column"]] = ( + benchmark_scores[mlip] + ) - category_rows, _ = update_score_style(category_rows, category_weights) - all_category_rows[category_index] = category_rows - break + category_rows, _ = update_score_style(category_rows, category_weights) + all_category_rows.append(category_rows) return all_category_rows From bee47c4d224163999f0628220a21a06c4cf89f94 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 26 May 2026 21:33:04 +0100 Subject: [PATCH 09/18] Improve efficiency of updates --- ml_peg/app/utils/register_callbacks.py | 31 +++++++++++++------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/ml_peg/app/utils/register_callbacks.py b/ml_peg/app/utils/register_callbacks.py index 45b11d90c..eefce2715 100644 --- a/ml_peg/app/utils/register_callbacks.py +++ b/ml_peg/app/utils/register_callbacks.py @@ -744,32 +744,31 @@ def update_category_from_benchmark(*args) -> list[list[dict]]: all_category_rows = [] - for category, category_info in sorted(all_info.items()): + for _category, category_info in sorted(all_info.items()): category_weights = next(iterator) - category_rows = deepcopy(next(iterator)) + current_rows = next(iterator) + new_rows = {row["MLIP"]: {"MLIP": row["MLIP"]} for row in current_rows} - for test_name, table_info in sorted(category_info.items()): - benchmark_rows = deepcopy(next(iterator)) + for _test_name, table_info in sorted(category_info.items()): + benchmark_rows = next(iterator) name_map = table_info["model_name_map"] - benchmark_scores = {} + benchmark_column = table_info["benchmark_column"] for row in benchmark_rows: display_name = row.get("MLIP") original_name = name_map.get(display_name, display_name) - score = row.get("Score") - if display_name is None or original_name is None: + if original_name is None: continue - benchmark_scores[original_name] = score - for row in category_rows: - mlip = row.get("MLIP") - if mlip in benchmark_scores: - row[all_info[category][test_name]["benchmark_column"]] = ( - benchmark_scores[mlip] - ) + if original_name in new_rows: + new_rows[original_name][benchmark_column] = row.get("Score") - category_rows, _ = update_score_style(category_rows, category_weights) - all_category_rows.append(category_rows) + new_rows = list(new_rows.values()) + new_rows, _ = update_score_style(new_rows, category_weights) + if new_rows == current_rows: + all_category_rows.append(no_update) + else: + all_category_rows.append(new_rows) return all_category_rows From 30eab2f200210715b7cff71a6e1742213c0b99a2 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 27 May 2026 01:05:18 +0100 Subject: [PATCH 10/18] Simplify score calculations --- ml_peg/analysis/utils/utils.py | 105 ++++++++++++++++++++------------- 1 file changed, 64 insertions(+), 41 deletions(-) diff --git a/ml_peg/analysis/utils/utils.py b/ml_peg/analysis/utils/utils.py index 5cfc184ac..87202116e 100644 --- a/ml_peg/analysis/utils/utils.py +++ b/ml_peg/analysis/utils/utils.py @@ -386,17 +386,31 @@ def calc_metric_scores( normalizer = normalizer if normalizer is not None else normalize_metric cleaned_thresholds = clean_thresholds(thresholds) if thresholds else None - metrics_scores = [row.copy() for row in metrics_data] - for row in metrics_scores: - for key, value in row.items(): - # Value may be ``None`` if missing for a benchmark - if key not in {"MLIP", "Score", "id"} and value is not None: - if cleaned_thresholds is None or key not in cleaned_thresholds: - row[key] = value - continue - - entry = cleaned_thresholds[key] - row[key] = normalizer(value, entry["good"], entry["bad"]) + if cleaned_thresholds is None: + return metrics_data + + metric_columns = [ + key for key in metrics_data[0] if key not in {"MLIP", "Score", "id"} + ] + threshold_lookup = { + key: (entry["good"], entry["bad"]) for key, entry in cleaned_thresholds.items() + } + + metrics_scores = [] + for row in metrics_data: + new_row = row.copy() + + for key in metric_columns: + if (value := row.get(key)) is None: + continue + + if (thresholds_entry := threshold_lookup.get(key)) is None: + continue + + good, bad = thresholds_entry + new_row[key] = normalizer(value, good, bad) + + metrics_scores.append(new_row) return metrics_scores @@ -407,7 +421,8 @@ def calc_table_scores( thresholds: Thresholds | None = None, normalizer: Callable[[float, float, float], float] | None = None, require_all_metrics: bool = True, -) -> list[MetricRow]: + return_scores: bool = False, +) -> list[MetricRow] | tuple[list[MetricRow], list[MetricRow]]: """ Calculate (normalised) score for each model and add to table data. @@ -429,53 +444,62 @@ def calc_table_scores( If True, score is set to None unless all metrics are present (not None). If False, score is calculated from available metrics only. Default is True. + return_scores + If True, also return the normalised metric rows used to calculate scores. + Default is False. Returns ------- - list[MetricRow] - Rows of data with combined score for each model added. + list[MetricRow] | tuple[list[MetricRow], list[MetricRow]] + Rows of data with combined score for each model added. If `return_scores` is + `True`, the normalised metric rows are also returned. """ weights = weights if weights else {} metrics_scores = calc_metric_scores(metrics_data, thresholds, normalizer) + metric_columns = [ + key for key in metrics_data[0] if key not in {"MLIP", "Score", "id"} + ] + metric_weights = {key: weights.get(key, 1.0) for key in metric_columns} + for metrics_row, scores_row in zip(metrics_data, metrics_scores, strict=True): - scores_list = [] - weights_list = [] + weighted_sum = 0.0 + weight_sum = 0.0 + all_metrics_present = True + contains_nan = False - for key, value in metrics_row.items(): - if key in {"MLIP", "Score", "id"}: + for key in metric_columns: + if (weight := metric_weights[key]) == 0: continue - weight = weights.get(key, 1.0) - if weight == 0: - # Weight of zero excludes the metric from scoring requirements - continue + value = metrics_row.get(key) + score = scores_row.get(key) - if value is not None and (score := scores_row.get(key)) is not None: - scores_list.append(score) - weights_list.append(weight) - else: - # Track if any (weighted) metric is missing + if value is None or score is None: all_metrics_present = False + continue - # Calculate score only if conditions are met - if require_all_metrics and not all_metrics_present: - # Strict mode: require all metrics to be present + if isinstance(score, float) and np.isnan(score): + contains_nan = True + break + + weighted_sum += score * weight + weight_sum += weight + + if contains_nan: + metrics_row["Score"] = np.nan + elif require_all_metrics and not all_metrics_present: metrics_row["Score"] = None - elif scores_list: - if np.nan in scores_list: - metrics_row["Score"] = np.nan - else: - # Calculate weighted average of available metrics - try: - metrics_row["Score"] = np.average(scores_list, weights=weights_list) - except ZeroDivisionError: - metrics_row["Score"] = np.mean(scores_list) + elif weight_sum > 0: + metrics_row["Score"] = weighted_sum / weight_sum else: metrics_row["Score"] = None + if return_scores: + return metrics_data, metrics_scores + return metrics_data @@ -690,8 +714,7 @@ def update_score_style( Updated table rows and style data. """ weights = clean_weights(weights) - data = calc_table_scores(data, weights, thresholds) - scored_data = calc_metric_scores(data, thresholds) + data, scored_data = calc_table_scores(data, weights, thresholds, return_scores=True) style = get_table_style(data, scored_data=scored_data) return data, style From 18d697210cd285529a111e0412be0f215b60fbd6 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 27 May 2026 01:54:41 +0100 Subject: [PATCH 11/18] Fix setting input boxes on page changes --- ml_peg/app/utils/register_callbacks.py | 27 +++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/ml_peg/app/utils/register_callbacks.py b/ml_peg/app/utils/register_callbacks.py index eefce2715..900054c09 100644 --- a/ml_peg/app/utils/register_callbacks.py +++ b/ml_peg/app/utils/register_callbacks.py @@ -814,7 +814,7 @@ def store_input_value( input_weight Weight value from input box. n_clicks - Number of clicks. Variable unused, but Input is required to reset weights. + Number of clicks. stored_weights Stored weights dictionary. @@ -829,7 +829,7 @@ def store_input_value( if input_weight is None: raise PreventUpdate stored_weights[column] = input_weight - elif trigger_id == f"{table_id}-reset-button": + elif trigger_id == f"{table_id}-reset-button" and n_clicks > 0: stored_weights.update( (key, default_weights.get(key, 1.0)) for key in stored_weights ) @@ -909,6 +909,8 @@ def store_threshold_values( # Reset to defaults is specified via reset button if trigger_id == f"{table_id}-reset-thresholds-button": + if not n_clicks: + raise PreventUpdate if cleaned_defaults: return deepcopy(cleaned_defaults) return cleaned_store @@ -1067,11 +1069,26 @@ def toggle_normalized_display( Output(f"{table_id}-{metric}-good-threshold", "value"), Output(f"{table_id}-{metric}-bad-threshold", "value"), Input(f"{table_id}-thresholds-store", "data"), - prevent_initial_call=True, + Input("app-location", "pathname"), + # prevent_initial_call=True, optional=True, ) - def sync_threshold_inputs(thresholds, metric=metric): - """Sync threshold input values with stored thresholds.""" + def sync_threshold_inputs( + thresholds: Thresholds | None, _pathname: str, metric: str = metric + ) -> tuple[float | None, float | None]: + """ + Sync threshold input values with stored thresholds. + + Parameters + ---------- + thresholds + Stored threshold values. + _pathname + Current pathname. Variable unused, but required as input to trigger on + path change. + metric + Metric name corresponding to the threshold inputs. + """ cleaned_thresholds = clean_thresholds(thresholds) if cleaned_thresholds and metric in cleaned_thresholds: entry = cleaned_thresholds[metric] From 9632cc054cf6bc7282b9c94c2892d331ceda734d Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 27 May 2026 15:10:18 +0100 Subject: [PATCH 12/18] Allow missing metrics data --- ml_peg/analysis/utils/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ml_peg/analysis/utils/utils.py b/ml_peg/analysis/utils/utils.py index 87202116e..f1c5a2209 100644 --- a/ml_peg/analysis/utils/utils.py +++ b/ml_peg/analysis/utils/utils.py @@ -386,7 +386,7 @@ def calc_metric_scores( normalizer = normalizer if normalizer is not None else normalize_metric cleaned_thresholds = clean_thresholds(thresholds) if thresholds else None - if cleaned_thresholds is None: + if cleaned_thresholds is None or not metrics_data: return metrics_data metric_columns = [ @@ -458,6 +458,9 @@ def calc_table_scores( metrics_scores = calc_metric_scores(metrics_data, thresholds, normalizer) + if not metrics_data: + return metrics_data if not return_scores else (metrics_data, metrics_scores) + metric_columns = [ key for key in metrics_data[0] if key not in {"MLIP", "Score", "id"} ] From f8b69491c80f9236ecedc2214194ce1192ddb3c0 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 27 May 2026 15:34:05 +0100 Subject: [PATCH 13/18] Remove duplicate stores --- ml_peg/app/utils/build_components.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/ml_peg/app/utils/build_components.py b/ml_peg/app/utils/build_components.py index 49f604d1b..d5dba5742 100644 --- a/ml_peg/app/utils/build_components.py +++ b/ml_peg/app/utils/build_components.py @@ -851,28 +851,11 @@ def build_test_layout( ] ) - # dcc.Store renders no HTML, so its position here doesn't affect layout. - # Placed before the table so the table and controls can share one wrapper below. - layout_contents.append( - Store( - id=f"{table.id}-computed-store", - storage_type="session", - data=table.data, - ) - ) - reserved = {"MLIP", "Score", "id"} metric_columns = [ col["id"] for col in table.columns if col.get("id") not in reserved ] - layout_contents.append( - Store( - id=f"{table.id}-raw-data-store", - storage_type="session", - data=table.data, - ) - ) layout_contents.append( Store( id=f"{table.id}-raw-tooltip-store", From d4022b44160cbc379cc7cb44f345576cb02cb353 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 27 May 2026 16:38:12 +0100 Subject: [PATCH 14/18] Remove table persistence --- ml_peg/app/build_app.py | 3 --- ml_peg/app/utils/load.py | 3 --- 2 files changed, 6 deletions(-) diff --git a/ml_peg/app/build_app.py b/ml_peg/app/build_app.py index 7cb37d7aa..b26f42b5c 100644 --- a/ml_peg/app/build_app.py +++ b/ml_peg/app/build_app.py @@ -815,9 +815,6 @@ def build_summary_table( tooltip_data=tooltip_rows, tooltip_delay=100, tooltip_duration=None, - persistence=True, - persistence_type="session", - persisted_props=["data"], tooltip_header=tooltip_header, editable=False, fill_width=False, diff --git a/ml_peg/app/utils/load.py b/ml_peg/app/utils/load.py index b62667f32..c6a775ba4 100644 --- a/ml_peg/app/utils/load.py +++ b/ml_peg/app/utils/load.py @@ -205,9 +205,6 @@ def rebuild_table( } ], sort_action="native", - persistence=True, - persistence_type="session", - persisted_props=["data"], fill_width=False, ) From 28cb6277324e7a34afbfa1880e8de6bba933d40c Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 27 May 2026 16:49:17 +0100 Subject: [PATCH 15/18] Add patching to table updates --- ml_peg/app/utils/register_callbacks.py | 46 ++++++++++++++++---------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/ml_peg/app/utils/register_callbacks.py b/ml_peg/app/utils/register_callbacks.py index 900054c09..6aa29de5c 100644 --- a/ml_peg/app/utils/register_callbacks.py +++ b/ml_peg/app/utils/register_callbacks.py @@ -12,6 +12,7 @@ ClientsideFunction, Input, Output, + Patch, State, callback, clientside_callback, @@ -551,23 +552,15 @@ def update_table_scores( trigger_id = ctx.triggered_id - if trigger_id in ("app-location", "cmap-store"): - filtered_rows = filter_rows_by_models(source_data, selected_models) - style = ( - get_table_style(filtered_rows, cmap_name=cmap_name or "viridis_r") - if filtered_rows - else [] - ) - style, tooltip_data = apply_level_of_theory_warnings( - filtered_rows, - style, - model_levels=model_levels, - metric_levels=metric_levels, - model_configs=model_configs, - ) - return filtered_rows, style, tooltip_data, source_data + # Recompute scores only when weights changed + if trigger_id == f"{table_id}-weight-store": + scored_rows, _ = update_score_style(source_data, stored_weights) + updated_store = scored_rows + + else: + scored_rows = source_data + updated_store = no_update - scored_rows, _ = update_score_style(source_data, stored_weights) filtered_rows = filter_rows_by_models(scored_rows, selected_models) style = ( get_table_style(filtered_rows, cmap_name=cmap_name or "viridis_r") @@ -581,7 +574,26 @@ def update_table_scores( metric_levels=metric_levels, model_configs=model_configs, ) - return filtered_rows, style, tooltip_data, scored_rows + + if not table_data or len(filtered_rows) != len(table_data): + return filtered_rows, style, tooltip_data, scored_rows + + patch = Patch() + rows_changed = False + + for row_index, (old_row, new_row) in enumerate( + zip(table_data, filtered_rows, strict=True) + ): + for key, new_value in new_row.items(): + if old_row.get(key) != new_value: + patch[row_index][key] = new_value + rows_changed = True + + # No visual change + if not rows_changed: + return no_update, style, tooltip_data, updated_store + + return patch, style, tooltip_data, updated_store @callback( Output(table_id, "data", allow_duplicate=True), From 3805bcda9c5890e4e2732e8ba5bbc7ea11364495 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 27 May 2026 17:06:07 +0100 Subject: [PATCH 16/18] Remove app location triggers --- ml_peg/app/utils/register_callbacks.py | 32 +++----------------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/ml_peg/app/utils/register_callbacks.py b/ml_peg/app/utils/register_callbacks.py index 6aa29de5c..232c74170 100644 --- a/ml_peg/app/utils/register_callbacks.py +++ b/ml_peg/app/utils/register_callbacks.py @@ -205,7 +205,6 @@ def update_summary_computed_store( Output("summary-table", "tooltip_data", allow_duplicate=True), Input("selected-models-store", "data"), Input("summary-table-computed-store", "data"), - Input("app-location", "pathname"), Input("cmap-store", "data"), prevent_initial_call="initial_duplicate", optional=True, @@ -213,7 +212,6 @@ def update_summary_computed_store( def sync_summary_table( selected_models: list[str] | None, computed_store: list[dict] | None, - _pathname: str, cmap_name: str | None, ) -> tuple[list[dict], list[dict], list[dict]]: """ @@ -225,9 +223,6 @@ def sync_summary_table( Models currently selected in the global model filter. computed_store Cached full summary rows for the overall summary table. - _pathname - Current pathname. Included so the visible table refreshes when the - summary page is opened. cmap_name Matplotlib colormap name from the cmap store. @@ -373,7 +368,6 @@ def update_table_from_store( Output(f"{table_id}-raw-data-store", "data", allow_duplicate=True), Input(f"{table_id}-weight-store", "data"), Input(f"{table_id}-thresholds-store", "data"), - Input("app-location", "pathname"), Input(f"{table_id}-normalized-toggle", "value"), Input("selected-models-store", "data"), Input("cmap-store", "data"), @@ -387,7 +381,6 @@ def update_table_from_store( def update_benchmark_table_scores( stored_weights: dict[str, float] | None, stored_threshold: dict | None, - _pathname: str, toggle_value: list[str] | None, selected_models: list[str] | None, cmap_name: str | None, @@ -413,8 +406,6 @@ def update_benchmark_table_scores( Stored weights dictionary for table metrics. stored_threshold Stored thresholds dictionary for table metric thresholds. - _pathname - Current URL path. Unused, required to trigger on path change. toggle_value Value of toggle to show normalised values. selected_models @@ -434,8 +425,7 @@ def update_benchmark_table_scores( # Page changes and toggle flips reuse the cached scored rows rather than # recalculating scores, we only re-score when weights/thresholds change. if ( - trigger_id - in ("app-location", f"{table_id}-normalized-toggle", "cmap-store") + trigger_id in (f"{table_id}-normalized-toggle", "cmap-store") and stored_computed_data ): display_rows = get_scores( @@ -528,7 +518,6 @@ def update_benchmark_table_scores( Output(f"{table_id}-computed-store", "data", allow_duplicate=True), Input(f"{table_id}-weight-store", "data"), Input("selected-models-store", "data"), - Input("app-location", "pathname"), Input("cmap-store", "data"), State(table_id, "data"), State(f"{table_id}-computed-store", "data"), @@ -538,7 +527,6 @@ def update_benchmark_table_scores( def update_table_scores( stored_weights: dict[str, float] | None, selected_models: list[str] | None, - _pathname: str, cmap_name: str | None, table_data: list[dict] | None, computed_store: list[dict] | None, @@ -601,7 +589,6 @@ def update_table_scores( Output(table_id, "tooltip_data", allow_duplicate=True), Input(f"{table_id}-computed-store", "data"), Input("selected-models-store", "data"), - Input("app-location", "pathname"), Input("cmap-store", "data"), prevent_initial_call="initial_duplicate", optional=True, @@ -609,7 +596,6 @@ def update_table_scores( def sync_table_from_computed_store( computed_store: list[dict] | None, selected_models: list[str] | None, - _pathname: str, cmap_name: str | None, ) -> tuple[list[dict], list[dict], list[dict]]: """ @@ -621,9 +607,6 @@ def sync_table_from_computed_store( Cached unfiltered rows for the category summary. selected_models Currently selected model names. - _pathname - Current pathname. Unused, required so the callback hydrates when the - category page is mounted. Returns ------- @@ -853,11 +836,10 @@ def store_input_value( @callback( Output(f"{input_id}-input", "value"), Input(f"{table_id}-weight-store", "data"), - Input("app-location", "pathname"), prevent_initial_call="initial_duplicate", optional=True, ) - def sync_inputs(stored_weights: dict[str, float], _pathname: str) -> float: + def sync_inputs(stored_weights: dict[str, float]) -> float: """ Sync weight values between the text input and Store. @@ -865,9 +847,6 @@ def sync_inputs(stored_weights: dict[str, float], _pathname: str) -> float: ---------- stored_weights Stored weight values for each column. - _pathname - Current pathname. Variable unused, but required as input to trigger on - path change. Returns ------- @@ -1081,12 +1060,10 @@ def toggle_normalized_display( Output(f"{table_id}-{metric}-good-threshold", "value"), Output(f"{table_id}-{metric}-bad-threshold", "value"), Input(f"{table_id}-thresholds-store", "data"), - Input("app-location", "pathname"), - # prevent_initial_call=True, optional=True, ) def sync_threshold_inputs( - thresholds: Thresholds | None, _pathname: str, metric: str = metric + thresholds: Thresholds | None, metric: str = metric ) -> tuple[float | None, float | None]: """ Sync threshold input values with stored thresholds. @@ -1095,9 +1072,6 @@ def sync_threshold_inputs( ---------- thresholds Stored threshold values. - _pathname - Current pathname. Variable unused, but required as input to trigger on - path change. metric Metric name corresponding to the threshold inputs. """ From fa9c75d3626a1ca2a09efb9afd21048427d375d2 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 27 May 2026 17:45:07 +0100 Subject: [PATCH 17/18] Add patch for category updates --- ml_peg/app/utils/register_callbacks.py | 63 ++++++++++++++++++++------ 1 file changed, 50 insertions(+), 13 deletions(-) diff --git a/ml_peg/app/utils/register_callbacks.py b/ml_peg/app/utils/register_callbacks.py index 232c74170..1efe6672f 100644 --- a/ml_peg/app/utils/register_callbacks.py +++ b/ml_peg/app/utils/register_callbacks.py @@ -736,36 +736,73 @@ def update_category_from_benchmark(*args) -> list[list[dict]]: """ # Rebuild inputs for each category iterator = iter(args) - - all_category_rows = [] + patched_outputs = [] for _category, category_info in sorted(all_info.items()): category_weights = next(iterator) current_rows = next(iterator) - new_rows = {row["MLIP"]: {"MLIP": row["MLIP"]} for row in current_rows} + + updated_rows = [] + for row in current_rows: + updated_row = row.copy() + updated_rows.append(updated_row) + + updated_by_mlip = {row["MLIP"]: row for row in updated_rows} + + benchmark_changed = False for _test_name, table_info in sorted(category_info.items()): benchmark_rows = next(iterator) - name_map = table_info["model_name_map"] + name_map = table_info["model_name_map"] benchmark_column = table_info["benchmark_column"] + for row in benchmark_rows: display_name = row.get("MLIP") original_name = name_map.get(display_name, display_name) - if original_name is None: + if original_name not in updated_by_mlip: continue - if original_name in new_rows: - new_rows[original_name][benchmark_column] = row.get("Score") + new_score = row.get("Score") + target_row = updated_by_mlip[original_name] + + if target_row.get(benchmark_column) != new_score: + target_row[benchmark_column] = new_score + benchmark_changed = True + + if not benchmark_changed: + patched_outputs.append(no_update) + continue + + # Recompute overall category scores using existing utility + rescored_rows, _ = update_score_style(updated_rows, category_weights) + + patch = Patch() + score_changed = False + + for idx, (old_row, new_row) in enumerate( + zip(current_rows, rescored_rows, strict=True) + ): + # Patch benchmark columns + for key, value in new_row.items(): + if key in {"MLIP", "Score"}: + continue + + if old_row.get(key) != value: + patch[idx][key] = value + score_changed = True + + # Patch overall score + if old_row.get("Score") != new_row.get("Score"): + patch[idx]["Score"] = new_row.get("Score") + score_changed = True - new_rows = list(new_rows.values()) - new_rows, _ = update_score_style(new_rows, category_weights) - if new_rows == current_rows: - all_category_rows.append(no_update) + if score_changed: + patched_outputs.append(patch) else: - all_category_rows.append(new_rows) + patched_outputs.append(no_update) - return all_category_rows + return patched_outputs def register_weight_callbacks( From 0e2d629579c287d0dbbd57cb709a987036f5ea1f Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Thu, 28 May 2026 15:36:49 +0100 Subject: [PATCH 18/18] Add patch for summary table updates --- ml_peg/app/utils/register_callbacks.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/ml_peg/app/utils/register_callbacks.py b/ml_peg/app/utils/register_callbacks.py index 1efe6672f..8d2c0d4db 100644 --- a/ml_peg/app/utils/register_callbacks.py +++ b/ml_peg/app/utils/register_callbacks.py @@ -658,19 +658,26 @@ def update_scores_store( """ # Only category summary tables should write to the global store if not table_id.endswith("-summary-table"): - return scores_data + raise PreventUpdate if not computed_rows: - return scores_data + raise PreventUpdate + + # Category table IDs are of form "[category]-summary-table" + category_key = table_id.removesuffix("-summary-table") + " Score" - if not scores_data: - scores_data = {} - # Update scores store. Category table IDs are of form "[category]-summary-table" - # Table headings are of the form "[category] Score" - scores_data[table_id.removesuffix("-summary-table") + " Score"] = { + new_scores = { row["MLIP"]: row["Score"] for row in computed_rows if row.get("MLIP") } - return scores_data + current_scores = (scores_data or {}).get(category_key) + + if current_scores == new_scores: + return no_update + + patch = Patch() + patch[category_key] = new_scores + + return patch def register_benchmark_to_category_callback(