From 08cecb1fd46030f88294353376727ddc73c1ed70 Mon Sep 17 00:00:00 2001 From: David Hauser Date: Tue, 10 Mar 2026 13:50:41 +0100 Subject: [PATCH 01/12] feat: add noether-init for scaffolding --- pyproject.toml | 8 + src/noether/scaffold/__init__.py | 1 + src/noether/scaffold/choices.py | 61 +++ src/noether/scaffold/cli.py | 99 ++++ src/noether/scaffold/config.py | 73 +++ src/noether/scaffold/file_copier.py | 225 ++++++++ src/noether/scaffold/generator.py | 13 + src/noether/scaffold/references/ahmedml.yaml | 8 + .../scaffold/references/drivaerml.yaml | 8 + .../scaffold/references/drivaernet.yaml | 8 + .../scaffold/references/emmi_wing.yaml | 8 + .../scaffold/references/shapenet_car.yaml | 8 + .../scaffold/template_files/__init__.py | 1 + .../template_files/callbacks/__init__.py | 3 + .../surface_volume_evaluation_metrics.py | 309 +++++++++++ .../callbacks/training_callbacks_caeml.yaml | 45 ++ .../training_callbacks_shapenet.yaml | 39 ++ .../configs/data_specs/caeml.yaml | 10 + .../configs/data_specs/emmi_wing.yaml | 12 + .../configs/data_specs/shapenet_car.yaml | 13 + .../caeml_dataset_normalizers.yaml | 31 ++ .../shapenet_dataset_normalizers.yaml | 22 + .../wing_dataset_normalizers.yaml | 38 ++ .../dataset_statistics/ahmedml_stats.yaml | 12 + .../dataset_statistics/drivaerml_stats.yaml | 12 + .../drivaernet++_stats.yaml | 24 + .../shapenet_car_stats.yaml | 8 + .../dataset_statistics/wing_stats.yaml | 21 + .../configs/datasets/caeml_dataset.yaml | 31 ++ .../configs/datasets/shapenet_dataset.yaml | 25 + .../configs/experiment/caeml/ab_upt.yaml | 28 + .../configs/experiment/caeml/transformer.yaml | 16 + .../configs/experiment/caeml/transolver.yaml | 16 + .../configs/experiment/caeml/upt.yaml | 28 + .../configs/experiment/emmi_wing/ab_upt.yaml | 36 ++ .../experiment/emmi_wing/transformer.yaml | 16 + .../experiment/emmi_wing/transolver.yaml | 16 + .../configs/experiment/emmi_wing/upt.yaml | 28 + .../configs/experiment/shapenet/ab_upt.yaml | 16 + .../experiment/shapenet/transformer.yaml | 6 + .../experiment/shapenet/transolver.yaml | 6 + .../configs/experiment/shapenet/upt.yaml | 16 + .../template_files/configs/model/ab_upt.yaml | 33 ++ .../configs/model/transformer.yaml | 15 + .../configs/model/transolver.yaml | 16 + .../template_files/configs/model/upt.yaml | 30 ++ .../configs/optimizer/adamw.yaml | 9 + .../configs/optimizer/lion.yaml | 9 + .../configs/pipeline/caeml_pipeline.yaml | 8 + .../configs/pipeline/shapenet_pipeline.yaml | 8 + .../configs/tracker/disabled.yaml | 1 + .../configs/tracker/tensorboard.yaml | 2 + .../configs/tracker/trackio.yaml | 2 + .../template_files/configs/tracker/wandb.yaml | 3 + .../template_files/configs/train_ahmedml.yaml | 26 + .../configs/train_drivaerml.yaml | 26 + .../configs/train_drivaernet.yaml | 26 + .../configs/train_emmi_wing.yaml | 26 + .../configs/train_shapenet_car.yaml | 25 + .../configs/trainer/caeml_trainer.yaml | 23 + .../configs/trainer/shapenet_trainer.yaml | 18 + .../scaffold/template_files/model/ab_upt.py | 77 +++ .../scaffold/template_files/model/base.py | 207 ++++++++ .../template_files/model/transformer.py | 89 ++++ .../template_files/model/transolver.py | 81 +++ .../scaffold/template_files/model/upt.py | 113 ++++ .../template_files/pipeline/__init__.py | 3 + .../pipeline/collators/__init__.py | 3 + .../collators/sparse_tensor_offset.py | 35 ++ .../pipeline/multistage_pipelines/__init__.py | 3 + .../multistage_pipelines/aero_multistage.py | 481 ++++++++++++++++++ .../pipeline/sample_processors/__init__.py | 3 + .../anchor_point_sampling.py | 88 ++++ .../template_files/schemas/__init__.py | 1 + .../schemas/callbacks/__init__.py | 7 + .../schemas/callbacks/callback_config.py | 37 ++ .../template_files/schemas/config_schema.py | 19 + .../schemas/datasets/__init__.py | 3 + .../schemas/datasets/aero_dataset_config.py | 10 + .../schemas/models/ab_upt_config.py | 11 + .../schemas/models/base_config.py | 24 + .../schemas/models/transformer_config.py | 11 + .../schemas/models/transolver_config.py | 13 + .../schemas/models/upt_config.py | 11 + .../schemas/pipelines/__init__.py | 1 + .../schemas/pipelines/aero_pipeline_config.py | 38 ++ .../schemas/trainers/__init__.py | 3 + .../automotive_aerodynamics_trainer_config.py | 33 ++ .../template_files/trainers/__init__.py | 3 + .../trainers/automotive_aerodynamics_cfd.py | 85 ++++ tests/test_scaffold.py | 127 +++++ 91 files changed, 3330 insertions(+) create mode 100644 src/noether/scaffold/__init__.py create mode 100644 src/noether/scaffold/choices.py create mode 100644 src/noether/scaffold/cli.py create mode 100644 src/noether/scaffold/config.py create mode 100644 src/noether/scaffold/file_copier.py create mode 100644 src/noether/scaffold/generator.py create mode 100644 src/noether/scaffold/references/ahmedml.yaml create mode 100644 src/noether/scaffold/references/drivaerml.yaml create mode 100644 src/noether/scaffold/references/drivaernet.yaml create mode 100644 src/noether/scaffold/references/emmi_wing.yaml create mode 100644 src/noether/scaffold/references/shapenet_car.yaml create mode 100644 src/noether/scaffold/template_files/__init__.py create mode 100644 src/noether/scaffold/template_files/callbacks/__init__.py create mode 100644 src/noether/scaffold/template_files/callbacks/surface_volume_evaluation_metrics.py create mode 100644 src/noether/scaffold/template_files/configs/callbacks/training_callbacks_caeml.yaml create mode 100644 src/noether/scaffold/template_files/configs/callbacks/training_callbacks_shapenet.yaml create mode 100644 src/noether/scaffold/template_files/configs/data_specs/caeml.yaml create mode 100644 src/noether/scaffold/template_files/configs/data_specs/emmi_wing.yaml create mode 100644 src/noether/scaffold/template_files/configs/data_specs/shapenet_car.yaml create mode 100644 src/noether/scaffold/template_files/configs/dataset_normalizers/caeml_dataset_normalizers.yaml create mode 100644 src/noether/scaffold/template_files/configs/dataset_normalizers/shapenet_dataset_normalizers.yaml create mode 100644 src/noether/scaffold/template_files/configs/dataset_normalizers/wing_dataset_normalizers.yaml create mode 100644 src/noether/scaffold/template_files/configs/dataset_statistics/ahmedml_stats.yaml create mode 100644 src/noether/scaffold/template_files/configs/dataset_statistics/drivaerml_stats.yaml create mode 100644 src/noether/scaffold/template_files/configs/dataset_statistics/drivaernet++_stats.yaml create mode 100644 src/noether/scaffold/template_files/configs/dataset_statistics/shapenet_car_stats.yaml create mode 100644 src/noether/scaffold/template_files/configs/dataset_statistics/wing_stats.yaml create mode 100644 src/noether/scaffold/template_files/configs/datasets/caeml_dataset.yaml create mode 100644 src/noether/scaffold/template_files/configs/datasets/shapenet_dataset.yaml create mode 100644 src/noether/scaffold/template_files/configs/experiment/caeml/ab_upt.yaml create mode 100644 src/noether/scaffold/template_files/configs/experiment/caeml/transformer.yaml create mode 100644 src/noether/scaffold/template_files/configs/experiment/caeml/transolver.yaml create mode 100644 src/noether/scaffold/template_files/configs/experiment/caeml/upt.yaml create mode 100644 src/noether/scaffold/template_files/configs/experiment/emmi_wing/ab_upt.yaml create mode 100644 src/noether/scaffold/template_files/configs/experiment/emmi_wing/transformer.yaml create mode 100644 src/noether/scaffold/template_files/configs/experiment/emmi_wing/transolver.yaml create mode 100644 src/noether/scaffold/template_files/configs/experiment/emmi_wing/upt.yaml create mode 100644 src/noether/scaffold/template_files/configs/experiment/shapenet/ab_upt.yaml create mode 100644 src/noether/scaffold/template_files/configs/experiment/shapenet/transformer.yaml create mode 100644 src/noether/scaffold/template_files/configs/experiment/shapenet/transolver.yaml create mode 100644 src/noether/scaffold/template_files/configs/experiment/shapenet/upt.yaml create mode 100644 src/noether/scaffold/template_files/configs/model/ab_upt.yaml create mode 100644 src/noether/scaffold/template_files/configs/model/transformer.yaml create mode 100644 src/noether/scaffold/template_files/configs/model/transolver.yaml create mode 100644 src/noether/scaffold/template_files/configs/model/upt.yaml create mode 100644 src/noether/scaffold/template_files/configs/optimizer/adamw.yaml create mode 100644 src/noether/scaffold/template_files/configs/optimizer/lion.yaml create mode 100644 src/noether/scaffold/template_files/configs/pipeline/caeml_pipeline.yaml create mode 100644 src/noether/scaffold/template_files/configs/pipeline/shapenet_pipeline.yaml create mode 100644 src/noether/scaffold/template_files/configs/tracker/disabled.yaml create mode 100644 src/noether/scaffold/template_files/configs/tracker/tensorboard.yaml create mode 100644 src/noether/scaffold/template_files/configs/tracker/trackio.yaml create mode 100644 src/noether/scaffold/template_files/configs/tracker/wandb.yaml create mode 100644 src/noether/scaffold/template_files/configs/train_ahmedml.yaml create mode 100644 src/noether/scaffold/template_files/configs/train_drivaerml.yaml create mode 100644 src/noether/scaffold/template_files/configs/train_drivaernet.yaml create mode 100644 src/noether/scaffold/template_files/configs/train_emmi_wing.yaml create mode 100644 src/noether/scaffold/template_files/configs/train_shapenet_car.yaml create mode 100644 src/noether/scaffold/template_files/configs/trainer/caeml_trainer.yaml create mode 100644 src/noether/scaffold/template_files/configs/trainer/shapenet_trainer.yaml create mode 100644 src/noether/scaffold/template_files/model/ab_upt.py create mode 100644 src/noether/scaffold/template_files/model/base.py create mode 100644 src/noether/scaffold/template_files/model/transformer.py create mode 100644 src/noether/scaffold/template_files/model/transolver.py create mode 100644 src/noether/scaffold/template_files/model/upt.py create mode 100644 src/noether/scaffold/template_files/pipeline/__init__.py create mode 100644 src/noether/scaffold/template_files/pipeline/collators/__init__.py create mode 100644 src/noether/scaffold/template_files/pipeline/collators/sparse_tensor_offset.py create mode 100644 src/noether/scaffold/template_files/pipeline/multistage_pipelines/__init__.py create mode 100644 src/noether/scaffold/template_files/pipeline/multistage_pipelines/aero_multistage.py create mode 100644 src/noether/scaffold/template_files/pipeline/sample_processors/__init__.py create mode 100644 src/noether/scaffold/template_files/pipeline/sample_processors/anchor_point_sampling.py create mode 100644 src/noether/scaffold/template_files/schemas/__init__.py create mode 100644 src/noether/scaffold/template_files/schemas/callbacks/__init__.py create mode 100644 src/noether/scaffold/template_files/schemas/callbacks/callback_config.py create mode 100644 src/noether/scaffold/template_files/schemas/config_schema.py create mode 100644 src/noether/scaffold/template_files/schemas/datasets/__init__.py create mode 100644 src/noether/scaffold/template_files/schemas/datasets/aero_dataset_config.py create mode 100644 src/noether/scaffold/template_files/schemas/models/ab_upt_config.py create mode 100644 src/noether/scaffold/template_files/schemas/models/base_config.py create mode 100644 src/noether/scaffold/template_files/schemas/models/transformer_config.py create mode 100644 src/noether/scaffold/template_files/schemas/models/transolver_config.py create mode 100644 src/noether/scaffold/template_files/schemas/models/upt_config.py create mode 100644 src/noether/scaffold/template_files/schemas/pipelines/__init__.py create mode 100644 src/noether/scaffold/template_files/schemas/pipelines/aero_pipeline_config.py create mode 100644 src/noether/scaffold/template_files/schemas/trainers/__init__.py create mode 100644 src/noether/scaffold/template_files/schemas/trainers/automotive_aerodynamics_trainer_config.py create mode 100644 src/noether/scaffold/template_files/trainers/__init__.py create mode 100644 src/noether/scaffold/template_files/trainers/automotive_aerodynamics_cfd.py create mode 100644 tests/test_scaffold.py diff --git a/pyproject.toml b/pyproject.toml index f039c787..ad463ad2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,12 +43,16 @@ Docs = "https://noether-docs.emmi.ai/" [tool.setuptools_scm] write_to = "src/noether/_version.py" +[tool.setuptools.package-data] +"noether.scaffold" = ["references/*.yaml", "template_files/**/*"] + [project.scripts] noether-train = "noether.training.cli.main_train:main" noether-train-submit-job = "noether.training.cli.submit_job:main" noether-eval = "noether.inference.cli.main_inference:main" noether-data = "noether.io.cli.cli:app" noether-dataset-stats = "noether.data.tools.calculate_statistics:main" +noether-init = "noether.scaffold.cli:app" # --- Centralized Development & Tooling Dependencies --- # These are dependencies for developing the *entire* workspace. @@ -130,6 +134,10 @@ module = [ # "rtree.*" ] +[[tool.mypy.overrides]] +module = ["noether.scaffold.template_files.*"] +ignore_errors = true + [tool.pytest.ini_options] testpaths = ["tests"] pythonpath = ["src"] diff --git a/src/noether/scaffold/__init__.py b/src/noether/scaffold/__init__.py new file mode 100644 index 00000000..bbf17085 --- /dev/null +++ b/src/noether/scaffold/__init__.py @@ -0,0 +1 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. diff --git a/src/noether/scaffold/choices.py b/src/noether/scaffold/choices.py new file mode 100644 index 00000000..c2d2ad6a --- /dev/null +++ b/src/noether/scaffold/choices.py @@ -0,0 +1,61 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from __future__ import annotations + +from enum import StrEnum + +_MODEL_CLASS_NAMES: dict[str, str] = { + "transformer": "Transformer", + "upt": "UPT", + "ab_upt": "ABUPT", + "transolver": "Transolver", +} + + +class ModelChoice(StrEnum): + TRANSFORMER = "transformer" + UPT = "upt" + AB_UPT = "ab_upt" + TRANSOLVER = "transolver" + + @property + def class_name(self) -> str: + return _MODEL_CLASS_NAMES[self.value] + + @property + def module_name(self) -> str: + return self.value + + @property + def schema_module(self) -> str: + return f"{self.value}_config" + + @property + def config_class_name(self) -> str: + return f"{self.class_name}Config" + + +class DatasetChoice(StrEnum): + SHAPENET_CAR = "shapenet_car" + DRIVAERNET = "drivaernet" + DRIVAERML = "drivaerml" + AHMEDML = "ahmedml" + EMMI_WING = "emmi_wing" + + +class OptimizerChoice(StrEnum): + ADAMW = "adamw" + LION = "lion" + + +class TrackerChoice(StrEnum): + WANDB = "wandb" + TRACKIO = "trackio" + TENSORBOARD = "tensorboard" + DISABLED = "disabled" + + +class HardwareChoice(StrEnum): + GPU = "gpu" + MPS = "mps" + CPU = "cpu" diff --git a/src/noether/scaffold/cli.py b/src/noether/scaffold/cli.py new file mode 100644 index 00000000..d9ecf924 --- /dev/null +++ b/src/noether/scaffold/cli.py @@ -0,0 +1,99 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from pathlib import Path +from typing import Annotated + +import typer + +from .choices import DatasetChoice, HardwareChoice, ModelChoice, OptimizerChoice, TrackerChoice +from .config import ScaffoldConfig, resolve_config +from .generator import generate_project + +app = typer.Typer( + name="noether-init", + help="Scaffold a new Noether training project.", + add_completion=False, +) + + +@app.command() +def main( + project_name: Annotated[str, typer.Argument(help="Project name (valid Python identifier)")], + model: Annotated[ModelChoice, typer.Option("--model", "-m", help="Model architecture")] = ..., # type: ignore[assignment] + dataset: Annotated[DatasetChoice, typer.Option("--dataset", "-d", help="Dataset")] = ..., # type: ignore[assignment] + dataset_path: Annotated[str, typer.Option("--dataset-path", help="Path to dataset")] = ..., # type: ignore[assignment] + optimizer: Annotated[OptimizerChoice, typer.Option("--optimizer", "-o", help="Optimizer")] = OptimizerChoice.ADAMW, + tracker: Annotated[ + TrackerChoice, typer.Option("--tracker", "-t", help="Experiment tracker") + ] = TrackerChoice.DISABLED, + hardware: Annotated[HardwareChoice, typer.Option("--hardware", help="Hardware target")] = HardwareChoice.GPU, + project_dir: Annotated[Path, typer.Option("--project-dir", "-l", help="Where to create project dir")] = Path("."), + wandb_entity: Annotated[ + str | None, typer.Option("--wandb-entity", help="W&B entity (required if tracker=wandb)") + ] = None, +) -> None: + """Scaffold a new Noether training project.""" + # Validate project name + if not project_name.isidentifier(): + typer.echo(f"Error: '{project_name}' is not a valid Python identifier.", err=True) + raise typer.Exit(1) + + # Validate if wandb has entity set + if tracker == TrackerChoice.WANDB and not wandb_entity: + typer.echo("Error: --wandb-entity is required when --tracker=wandb.", err=True) + raise typer.Exit(1) + + # Resolve to absolute path + project_dir = (project_dir / project_name).resolve() + + # Check if project dir already exists + if project_dir.exists(): + typer.echo(f"Error: Directory already exists: {project_dir}", err=True) + raise typer.Exit(1) + + # Build config + config = resolve_config( + project_name=project_name, + model=model, + dataset=dataset, + dataset_path=dataset_path, + optimizer=optimizer, + tracker=tracker, + hardware=hardware, + project_dir=project_dir, + wandb_entity=wandb_entity, + ) + + # Generate + typer.echo(f"Creating project '{project_name}' at {project_dir}") + generate_project(config) + + # Print summary + _print_summary(config) + + +def _print_summary(config: ScaffoldConfig) -> None: + typer.echo("") + typer.echo("Project created successfully!") + typer.echo("") + typer.echo("Configuration:") + typer.echo(f" Project: {config.project_name}") + typer.echo(f" Model: {config.model.value}") + typer.echo(f" Dataset: {config.dataset.value}") + typer.echo(f" Optimizer: {config.optimizer.value}") + typer.echo(f" Tracker: {config.tracker.value}") + typer.echo(f" Hardware: {config.hardware.value}") + typer.echo(f" Path: {config.project_dir}") + typer.echo("") + + # Suggest run command + typer.echo("To train, run:") + typer.echo(f" uv run noether-train --config-dir {config.project_dir}/configs \\") + typer.echo(f" --config-name train +experiment={config.model.value}") + typer.echo("") + typer.echo("Experiment configs for all models are in configs/experiment/.") + typer.echo("") + + +if __name__ == "__main__": + app() diff --git a/src/noether/scaffold/config.py b/src/noether/scaffold/config.py new file mode 100644 index 00000000..331d2696 --- /dev/null +++ b/src/noether/scaffold/config.py @@ -0,0 +1,73 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from __future__ import annotations + +import importlib.resources +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +import yaml + +from .choices import DatasetChoice, HardwareChoice, ModelChoice, OptimizerChoice, TrackerChoice + +TEMPLATES = importlib.resources.files("noether.scaffold.template_files") + + +@dataclass +class ScaffoldConfig: + project_name: str + model: ModelChoice + dataset: DatasetChoice + dataset_path: str + optimizer: OptimizerChoice + tracker: TrackerChoice + hardware: HardwareChoice + project_dir: Path + wandb_entity: str | None + + # Resolved from reference YAML + reference: dict[str, Any] = field(default_factory=dict) + + +def substitute(content: str, config: ScaffoldConfig) -> str: + """Replace template placeholders with config values.""" + result = content.replace("__PROJECT__", config.project_name) + result = result.replace("__DATASET_PATH__", config.dataset_path) + result = result.replace("__OPTIMIZER__", config.optimizer.value) + result = result.replace("__TRACKER__", config.tracker.value) + return result + + +def load_reference(dataset: DatasetChoice) -> dict[str, Any]: + """Load reference YAML for a dataset from package resources.""" + ref_files = importlib.resources.files("noether.scaffold.references") + ref_path = ref_files / f"{dataset.value}.yaml" + with importlib.resources.as_file(ref_path) as p: + return dict(yaml.safe_load(p.read_text())) + + +def resolve_config( + project_name: str, + model: ModelChoice, + dataset: DatasetChoice, + dataset_path: str, + optimizer: OptimizerChoice, + tracker: TrackerChoice, + hardware: HardwareChoice, + project_dir: Path, + wandb_entity: str | None, +) -> ScaffoldConfig: + """Build a fully-resolved ScaffoldConfig.""" + return ScaffoldConfig( + project_name=project_name, + model=model, + dataset=dataset, + dataset_path=dataset_path, + optimizer=optimizer, + tracker=tracker, + hardware=hardware, + project_dir=project_dir, + wandb_entity=wandb_entity, + reference=load_reference(dataset), + ) diff --git a/src/noether/scaffold/file_copier.py b/src/noether/scaffold/file_copier.py new file mode 100644 index 00000000..3e80bf16 --- /dev/null +++ b/src/noether/scaffold/file_copier.py @@ -0,0 +1,225 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from __future__ import annotations + +from importlib.resources.abc import Traversable +from pathlib import Path + +from .choices import HardwareChoice, ModelChoice +from .config import TEMPLATES, ScaffoldConfig, substitute + + +def _write(path: Path, content: str) -> None: + """Write *content* to *path*, creating parent directories as needed.""" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content) + + +def _copy_template_with_substitution( + template_file: Traversable, destination_path: Path, config: ScaffoldConfig +) -> None: + """Copy a template file with placeholder substitution.""" + destination_path.parent.mkdir(parents=True, exist_ok=True) + content = template_file.read_text() + destination_path.write_text(substitute(content, config)) + + +def _copy_verbatim(template_file: Traversable, destination_path: Path) -> None: + """Copy a template file verbatim (no substitution).""" + destination_path.parent.mkdir(parents=True, exist_ok=True) + destination_path.write_text(template_file.read_text()) + + +def copy_python_files(config: ScaffoldConfig) -> None: + """Copy template Python files into the new project with substitutions.""" + tpl = TEMPLATES + project_dir = config.project_dir + + # --- Model files (model-specific) --- + _copy_template_with_substitution(tpl / "model" / "base.py", project_dir / "model" / "base.py", config) + _copy_template_with_substitution( + tpl / "schemas" / "models" / "base_config.py", + project_dir / "schemas" / "models" / "base_config.py", + config, + ) + _copy_template_with_substitution( + tpl / "model" / f"{config.model.module_name}.py", + project_dir / "model" / f"{config.model.module_name}.py", + config, + ) + _copy_template_with_substitution( + tpl / "schemas" / "models" / f"{config.model.schema_module}.py", + project_dir / "schemas" / "models" / f"{config.model.schema_module}.py", + config, + ) + + # --- Infrastructure files (with __PROJECT__ substitution) --- + _copy_template_with_substitution(tpl / "pipeline" / "__init__.py", project_dir / "pipeline" / "__init__.py", config) + _copy_template_with_substitution( + tpl / "pipeline" / "collators" / "__init__.py", + project_dir / "pipeline" / "collators" / "__init__.py", + config, + ) + _copy_template_with_substitution( + tpl / "pipeline" / "collators" / "sparse_tensor_offset.py", + project_dir / "pipeline" / "collators" / "sparse_tensor_offset.py", + config, + ) + _copy_template_with_substitution( + tpl / "pipeline" / "multistage_pipelines" / "__init__.py", + project_dir / "pipeline" / "multistage_pipelines" / "__init__.py", + config, + ) + _copy_template_with_substitution( + tpl / "pipeline" / "multistage_pipelines" / "aero_multistage.py", + project_dir / "pipeline" / "multistage_pipelines" / "aero_multistage.py", + config, + ) + _copy_template_with_substitution( + tpl / "pipeline" / "sample_processors" / "__init__.py", + project_dir / "pipeline" / "sample_processors" / "__init__.py", + config, + ) + _copy_template_with_substitution( + tpl / "pipeline" / "sample_processors" / "anchor_point_sampling.py", + project_dir / "pipeline" / "sample_processors" / "anchor_point_sampling.py", + config, + ) + _copy_template_with_substitution( + tpl / "trainers" / "automotive_aerodynamics_cfd.py", + project_dir / "trainers" / "automotive_aerodynamics_cfd.py", + config, + ) + _copy_template_with_substitution( + tpl / "callbacks" / "surface_volume_evaluation_metrics.py", + project_dir / "callbacks" / "surface_volume_evaluation_metrics.py", + config, + ) + _copy_template_with_substitution( + tpl / "schemas" / "datasets" / "aero_dataset_config.py", + project_dir / "schemas" / "datasets" / "aero_dataset_config.py", + config, + ) + _copy_template_with_substitution( + tpl / "schemas" / "pipelines" / "aero_pipeline_config.py", + project_dir / "schemas" / "pipelines" / "aero_pipeline_config.py", + config, + ) + _copy_template_with_substitution( + tpl / "schemas" / "trainers" / "automotive_aerodynamics_trainer_config.py", + project_dir / "schemas" / "trainers" / "automotive_aerodynamics_trainer_config.py", + config, + ) + _copy_template_with_substitution( + tpl / "schemas" / "callbacks" / "callback_config.py", + project_dir / "schemas" / "callbacks" / "callback_config.py", + config, + ) + _copy_template_with_substitution( + tpl / "schemas" / "config_schema.py", project_dir / "schemas" / "config_schema.py", config + ) + + # --- Static init files (verbatim copies) --- + _copy_verbatim(tpl / "callbacks" / "__init__.py", project_dir / "callbacks" / "__init__.py") + _copy_verbatim(tpl / "trainers" / "__init__.py", project_dir / "trainers" / "__init__.py") + _copy_verbatim(tpl / "schemas" / "__init__.py", project_dir / "schemas" / "__init__.py") + _copy_verbatim(tpl / "schemas" / "datasets" / "__init__.py", project_dir / "schemas" / "datasets" / "__init__.py") + _copy_verbatim(tpl / "schemas" / "pipelines" / "__init__.py", project_dir / "schemas" / "pipelines" / "__init__.py") + _copy_verbatim(tpl / "schemas" / "trainers" / "__init__.py", project_dir / "schemas" / "trainers" / "__init__.py") + _copy_verbatim(tpl / "schemas" / "callbacks" / "__init__.py", project_dir / "schemas" / "callbacks" / "__init__.py") + + +def generate_python_files(config: ScaffoldConfig) -> None: + """Generate dynamic Python files that depend on model choice.""" + proj = config.project_dir + + # --- Empty __init__.py files --- + _write(proj / "__init__.py", "") + _write(proj / "configs" / "__init__.py", "") + + # --- schemas/models/any_model_config.py (depends on model choice) --- + cfg_cls = config.model.config_class_name + schema_mod = config.model.schema_module + _write( + proj / "schemas" / "models" / "any_model_config.py", + f"from typing import Union\n\nfrom .{schema_mod} import {cfg_cls}\n\nAnyModelConfig = Union[{cfg_cls}]\n", + ) + + # --- schemas/models/__init__.py (depends on model choice) --- + _write( + proj / "schemas" / "models" / "__init__.py", + f"from .{config.model.schema_module} import {config.model.config_class_name}\n", + ) + + # --- model/__init__.py (depends on model choice) --- + _write( + proj / "model" / "__init__.py", + f"from .{config.model.module_name} import {config.model.class_name}\n", + ) + + +def copy_yaml_configs(config: ScaffoldConfig) -> None: + """Copy all YAML config files into the new project.""" + tpl = TEMPLATES / "configs" + dst = config.project_dir / "configs" + ref = config.reference + + # --- Verbatim copies (data_specs, normalizers, statistics, datasets, optimizer) --- + verbatim = [ + ("data_specs", ref.get("data_specs_file")), + ("dataset_normalizers", ref.get("normalizers_file")), + ("dataset_statistics", ref.get("statistics_file")), + ("datasets", ref.get("dataset_config_file")), + ] + for subdir, filename in verbatim: + if filename: + _copy_verbatim(tpl / subdir / f"{filename}.yaml", dst / subdir / f"{filename}.yaml") + + _copy_verbatim( + tpl / "optimizer" / f"{config.optimizer.value}.yaml", dst / "optimizer" / f"{config.optimizer.value}.yaml" + ) + + # --- With substitution (model, pipeline, trainer, callbacks, tracker, train) --- + _copy_template_with_substitution( + tpl / "model" / f"{config.model.value}.yaml", dst / "model" / f"{config.model.value}.yaml", config + ) + + pipeline_file = ref.get("pipeline_file") + if pipeline_file: + _copy_template_with_substitution( + tpl / "pipeline" / f"{pipeline_file}.yaml", dst / "pipeline" / f"{pipeline_file}.yaml", config + ) + + trainer_file = ref.get("trainer_config_file") + if trainer_file: + _copy_template_with_substitution( + tpl / "trainer" / f"{trainer_file}.yaml", dst / "trainer" / f"{trainer_file}.yaml", config + ) + + callbacks_file = ref.get("callbacks_file") + if callbacks_file: + _copy_template_with_substitution( + tpl / "callbacks" / f"{callbacks_file}.yaml", dst / "callbacks" / f"{callbacks_file}.yaml", config + ) + + _copy_template_with_substitution( + tpl / "tracker" / f"{config.tracker.value}.yaml", dst / "tracker" / f"{config.tracker.value}.yaml", config + ) + + # --- Train YAML (per-dataset template) --- + _copy_template_with_substitution(tpl / f"train_{config.dataset.value}.yaml", dst / "train.yaml", config) + + # Append accelerator for non-GPU hardware + if config.hardware != HardwareChoice.GPU: + train_path = dst / "train.yaml" + content = train_path.read_text() + train_path.write_text(content + f"accelerator: {config.hardware.value}\n") + + # --- Experiment configs (all 4 models for the dataset's category) --- + category = ref.get("experiment_category", "shapenet") + for model in ModelChoice: + _copy_template_with_substitution( + tpl / "experiment" / category / f"{model.value}.yaml", + dst / "experiment" / f"{model.value}.yaml", + config, + ) diff --git a/src/noether/scaffold/generator.py b/src/noether/scaffold/generator.py new file mode 100644 index 00000000..c572ec48 --- /dev/null +++ b/src/noether/scaffold/generator.py @@ -0,0 +1,13 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from __future__ import annotations + +from .config import ScaffoldConfig +from .file_copier import copy_python_files, copy_yaml_configs, generate_python_files + + +def generate_project(config: ScaffoldConfig) -> None: + """Orchestrate full project generation.""" + copy_python_files(config) + generate_python_files(config) + copy_yaml_configs(config) diff --git a/src/noether/scaffold/references/ahmedml.yaml b/src/noether/scaffold/references/ahmedml.yaml new file mode 100644 index 00000000..efca8134 --- /dev/null +++ b/src/noether/scaffold/references/ahmedml.yaml @@ -0,0 +1,8 @@ +experiment_category: caeml +data_specs_file: caeml +normalizers_file: caeml_dataset_normalizers +statistics_file: ahmedml_stats +pipeline_file: caeml_pipeline +dataset_config_file: caeml_dataset +trainer_config_file: caeml_trainer +callbacks_file: training_callbacks_caeml diff --git a/src/noether/scaffold/references/drivaerml.yaml b/src/noether/scaffold/references/drivaerml.yaml new file mode 100644 index 00000000..d902efa6 --- /dev/null +++ b/src/noether/scaffold/references/drivaerml.yaml @@ -0,0 +1,8 @@ +experiment_category: caeml +data_specs_file: caeml +normalizers_file: caeml_dataset_normalizers +statistics_file: drivaerml_stats +pipeline_file: caeml_pipeline +dataset_config_file: caeml_dataset +trainer_config_file: caeml_trainer +callbacks_file: training_callbacks_caeml diff --git a/src/noether/scaffold/references/drivaernet.yaml b/src/noether/scaffold/references/drivaernet.yaml new file mode 100644 index 00000000..45f57b2d --- /dev/null +++ b/src/noether/scaffold/references/drivaernet.yaml @@ -0,0 +1,8 @@ +experiment_category: caeml +data_specs_file: caeml +normalizers_file: caeml_dataset_normalizers +statistics_file: drivaernet++_stats +pipeline_file: caeml_pipeline +dataset_config_file: caeml_dataset +trainer_config_file: caeml_trainer +callbacks_file: training_callbacks_caeml diff --git a/src/noether/scaffold/references/emmi_wing.yaml b/src/noether/scaffold/references/emmi_wing.yaml new file mode 100644 index 00000000..bd66d28c --- /dev/null +++ b/src/noether/scaffold/references/emmi_wing.yaml @@ -0,0 +1,8 @@ +experiment_category: emmi_wing +data_specs_file: emmi_wing +normalizers_file: wing_dataset_normalizers +statistics_file: wing_stats +pipeline_file: caeml_pipeline +dataset_config_file: caeml_dataset +trainer_config_file: caeml_trainer +callbacks_file: training_callbacks_caeml diff --git a/src/noether/scaffold/references/shapenet_car.yaml b/src/noether/scaffold/references/shapenet_car.yaml new file mode 100644 index 00000000..14ad8ba0 --- /dev/null +++ b/src/noether/scaffold/references/shapenet_car.yaml @@ -0,0 +1,8 @@ +experiment_category: shapenet +data_specs_file: shapenet_car +normalizers_file: shapenet_dataset_normalizers +statistics_file: shapenet_car_stats +pipeline_file: shapenet_pipeline +dataset_config_file: shapenet_dataset +trainer_config_file: shapenet_trainer +callbacks_file: training_callbacks_shapenet diff --git a/src/noether/scaffold/template_files/__init__.py b/src/noether/scaffold/template_files/__init__.py new file mode 100644 index 00000000..aede2f25 --- /dev/null +++ b/src/noether/scaffold/template_files/__init__.py @@ -0,0 +1 @@ +# Copyright © 2026 Emmi AI GmbH. All rights reserved. diff --git a/src/noether/scaffold/template_files/callbacks/__init__.py b/src/noether/scaffold/template_files/callbacks/__init__.py new file mode 100644 index 00000000..ab05a8fe --- /dev/null +++ b/src/noether/scaffold/template_files/callbacks/__init__.py @@ -0,0 +1,3 @@ +# Copyright © 2026 Emmi AI GmbH. All rights reserved. + +from .surface_volume_evaluation_metrics import SurfaceVolumeEvaluationMetricsCallback diff --git a/src/noether/scaffold/template_files/callbacks/surface_volume_evaluation_metrics.py b/src/noether/scaffold/template_files/callbacks/surface_volume_evaluation_metrics.py new file mode 100644 index 00000000..11cec4ab --- /dev/null +++ b/src/noether/scaffold/template_files/callbacks/surface_volume_evaluation_metrics.py @@ -0,0 +1,309 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from collections import defaultdict + +import torch +from __PROJECT__.schemas.callbacks import SurfaceVolumeEvaluationMetricsCallbackConfig + +from noether.core.callbacks.periodic import PeriodicDataIteratorCallback + +# Constants +DEFAULT_EVALUATION_MODES = [ + "surface_pressure", + "surface_friction", + "volume_velocity", + "volume_pressure", + "volume_vorticity", +] + +METRIC_SUFFIX_TARGET = "_target" +METRIC_PREFIX_LOSS = "loss/" + + +class MetricType: + """Metric type identifiers.""" + + MSE = "mse" + MAE = "mae" + L2ERR = "l2err" + + +class SurfaceVolumeEvaluationMetricsCallback(PeriodicDataIteratorCallback): + """ + Callback for computing evaluation metrics on surface and volume predictions. + + This callback periodically evaluates model performance by computing MSE, MAE, + and L2 error metrics for various physical fields (pressure, velocity, friction, etc.). + Supports both standard and chunked inference for memory efficiency. + + Args: + callback_config: Configuration for the callback including dataset key, + forward properties, and chunking settings + **kwargs: Additional arguments passed to parent class + + Attributes: + dataset_key: Identifier for the dataset to evaluate + evaluation_modes: List of field names to evaluate + dataset_normalizers: Normalizers for denormalizing predictions + forward_properties: Properties to pass to model forward + chunked_inference: Whether to use chunked inference + chunk_properties: Properties to chunk + chunk_size: Size of each chunk + chunk_property: Property to determine chunk count + """ + + def __init__(self, callback_config: SurfaceVolumeEvaluationMetricsCallbackConfig, **kwargs): + super().__init__(callback_config, **kwargs) + + self._config = callback_config + self.dataset_key = callback_config.dataset_key + self.evaluation_modes = DEFAULT_EVALUATION_MODES + self.dataset_normalizers = self.data_container.get_dataset(self.dataset_key).normalizers + self.forward_properties = callback_config.forward_properties + self.chunked_inference = callback_config.chunked_inference + self.chunk_properties = callback_config.chunk_properties + self.chunk_size = callback_config.chunk_size + self.sample_size_property = callback_config.sample_size_property + + def _denormalize( + self, predictions: torch.Tensor, targets: torch.Tensor, key: str + ) -> tuple[torch.Tensor, torch.Tensor]: + """ + Denormalize predictions and targets using the appropriate normalizer. + + This method finds the specific normalizer for the given key and uses it to denormalize, + instead of calling pipeline.denormalize which would process the entire pipeline. + + Args: + predictions: Tensor containing the predictions to denormalize + targets: Tensor containing the targets to denormalize + key: Key to identify the normalizer for denormalization + + Returns: + Tuple of (denormalized_predictions, denormalized_targets) + + Raises: + KeyError: If no normalizer is found for the given key + """ + try: + normalizer = self.dataset_normalizers[key] + except KeyError as e: + raise KeyError( + f"No normalizer found for key '{key}'. Available normalizers: {list(self.dataset_normalizers.keys())}" + ) from e + + denormalized_predictions = normalizer.inverse(predictions.cpu()) + denormalized_targets = normalizer.inverse(targets.cpu()) + return denormalized_predictions, denormalized_targets + + def _compute_metrics( + self, denormalized_predictions: torch.Tensor, denormalized_targets: torch.Tensor, field_name: str + ) -> dict[str, torch.Tensor]: + """ + Compute evaluation metrics for predictions vs targets. + + Calculates Mean Squared Error (MSE), Mean Absolute Error (MAE), + and relative L2 error for the given field. + + Args: + denormalized_predictions: Denormalized prediction tensor + denormalized_targets: Denormalized target tensor + field_name: Name of the field being evaluated (used for metric naming) + + Returns: + Dictionary mapping metric names to computed values + """ + delta = denormalized_predictions - denormalized_targets + + metrics = { + f"{field_name}_{MetricType.MSE}": (delta**2).mean(), + f"{field_name}_{MetricType.MAE}": delta.abs().mean(), + } + + # L2 relative error (avoid division by zero) + target_norm = denormalized_targets.norm() + if target_norm > 1e-8: + metrics[f"{field_name}_{MetricType.L2ERR}"] = delta.norm() / target_norm + else: + self.logger.warning(f"Target norm too small for {field_name}, skipping L2 error") + + return metrics + + def _create_chunked_batch( + self, batch: dict[str, torch.Tensor], start_idx: int, end_idx: int + ) -> dict[str, torch.Tensor]: + """ + Create a batch slice for chunked processing. + + Args: + batch: Full batch dictionary + start_idx: Start index for the chunk + end_idx: End index for the chunk + + Returns: + Dictionary with chunked tensors for specified properties + """ + chunked_batch = {} + for key, value in batch.items(): + if key in self.chunk_properties: + chunked_batch[key] = value[:, start_idx:end_idx] + else: + chunked_batch[key] = value + return chunked_batch + + def _get_chunk_indices(self, batch_size: int) -> list[tuple[int, int]]: + """ + Calculate start and end indices for all chunks. + + Args: + batch_size: Total size of the batch to chunk + + Returns: + List of (start_idx, end_idx) tuples for each chunk + """ + indices = [] + n_chunks = batch_size // self.chunk_size + + for chunk_idx in range(n_chunks): + start = chunk_idx * self.chunk_size + end = start + self.chunk_size + indices.append((start, end)) + + return indices + + def _chunked_model_inference(self, batch: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: + """ + Run model inference in chunks to reduce memory usage. + + Splits the batch into smaller chunks, processes each independently, + and concatenates the results. + + Args: + batch: Full batch dictionary + + Returns: + Dictionary of model outputs with concatenated chunk results + """ + + batch_size = batch[self.sample_size_property].shape[1] + chunk_indices = self._get_chunk_indices(batch_size) + + model_outputs = defaultdict(list) + for start_idx, end_idx in chunk_indices: + chunked_batch = self._create_chunked_batch(batch, start_idx, end_idx) + forward_inputs = {k: v for k, v in chunked_batch.items() if k in self.forward_properties} + + with self.trainer.autocast_context: + chunked_outputs = self.model(**forward_inputs) + + # Accumulate outputs + for key, value in chunked_outputs.items(): + model_outputs[key].append(value) + + # Concatenate all chunks + return {key: torch.cat(chunks, dim=1) for key, chunks in model_outputs.items()} + + def _run_model_inference(self, batch: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: + """ + Run model inference, optionally in chunks. + + Args: + batch: Input batch dictionary + + Returns: + Dictionary of model outputs + """ + if self.chunked_inference: + return self._chunked_model_inference(batch) + else: + forward_inputs = {k: v for k, v in batch.items() if k in self.forward_properties} + with self.trainer.autocast_context: + return self.model(**forward_inputs) + + def _align_chunk_sizes(self, prediction: torch.Tensor, target: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """ + Align prediction and target sizes when using chunked inference. + + Args: + prediction: Prediction tensor + target: Target tensor + + Returns: + Tuple of (aligned_prediction, aligned_target) + """ + if self.chunked_inference and prediction.shape[1] != target.shape[1]: + min_size = min(prediction.shape[1], target.shape[1]) + prediction = prediction[:, :min_size] + target = target[:, :min_size] + return prediction, target + + def _compute_mode_metrics( + self, batch: dict[str, torch.Tensor], model_outputs: dict[str, torch.Tensor], mode: str + ) -> dict[str, torch.Tensor]: + """ + Compute metrics for a specific evaluation mode. + + Args: + batch: Input batch containing targets + model_outputs: Model predictions + mode: Evaluation mode (field name) + + Returns: + Dictionary of computed metrics for this mode + """ + target = batch.get(f"{mode}{METRIC_SUFFIX_TARGET}") + prediction = model_outputs.get(mode) + + if prediction is None or target is None: + return {} + + # Denormalize + denorm_pred, denorm_target = self._denormalize(prediction, target, mode) + + # Align sizes if needed + denorm_pred, denorm_target = self._align_chunk_sizes(denorm_pred, denorm_target) + + # Compute metrics + return self._compute_metrics(denorm_pred, denorm_target, mode) + + def process_data(self, batch: dict[str, torch.Tensor], **_) -> dict[str, torch.Tensor]: + """ + Execute forward pass and compute metrics. + + Args: + batch: Input batch dictionary + **_: Additional unused arguments + + Returns: + Dictionary mapping metric names to computed values + """ + model_outputs = self._run_model_inference(batch) + + metrics = {} + for mode in self.evaluation_modes: + metrics.update(self._compute_mode_metrics(batch, model_outputs, mode)) + + return metrics + + def process_results(self, results: dict[str, torch.Tensor], **_) -> None: + """ + Log computed metrics to writer. + + Args: + results: Dictionary of computed metrics + **_: Additional unused arguments + """ + if not results: + self.logger.warning(f"No metrics computed for dataset '{self.dataset_key}'") + return + + for name, metric in results.items(): + metric_key = f"{METRIC_PREFIX_LOSS}{self.dataset_key}/{name}" + self.writer.add_scalar( + key=metric_key, + value=metric.mean(), + logger=self.logger, + format_str=".6f", + ) + + self.logger.debug(f"Logged {len(results)} metrics for dataset '{self.dataset_key}'") diff --git a/src/noether/scaffold/template_files/configs/callbacks/training_callbacks_caeml.yaml b/src/noether/scaffold/template_files/configs/callbacks/training_callbacks_caeml.yaml new file mode 100644 index 00000000..941127c4 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/callbacks/training_callbacks_caeml.yaml @@ -0,0 +1,45 @@ +- kind: noether.core.callbacks.CheckpointCallback + every_n_epochs: 10 + save_weights: false + save_latest_weights: true + save_latest_optim: true + name: CheckpointCallback +# validation loss +- kind: noether.training.callbacks.OfflineLossCallback + batch_size: 1 + every_n_epochs: 1 + dataset_key: val + name: OfflineLossCallback +- kind: noether.core.callbacks.BestCheckpointCallback + every_n_epochs: 1 + metric_key: loss/val/total + name: BestCheckpointCallback +# test loss +- kind: __PROJECT__.callbacks.SurfaceVolumeEvaluationMetricsCallback + batch_size: 1 + every_n_epochs: 1 + dataset_key: val + forward_properties: ${model.forward_properties} + name: SurfaceVolumeEvaluationMetricsCallback +- kind: __PROJECT__.callbacks.SurfaceVolumeEvaluationMetricsCallback + batch_size: 1 + every_n_epochs: ${trainer.max_epochs} + dataset_key: chunked_test + name: SurfaceVolumeEvaluationMetricsCallback + forward_properties: ${model.forward_properties} + chunked_inference: true + chunk_properties: ${chunk_properties} + sample_size_property: ${sample_size_property} + chunk_size: ${chunk_size} +- kind: noether.core.callbacks.EmaCallback + every_n_epochs: 10 + save_weights: false + save_last_weights: false + save_latest_weights: true + target_factors: + - 0.9999 + name: EmaCallback + # example of how to save/load only specific submodules of a composite model + # model_paths: + # - low_level_blocks + # - high_level_blocks diff --git a/src/noether/scaffold/template_files/configs/callbacks/training_callbacks_shapenet.yaml b/src/noether/scaffold/template_files/configs/callbacks/training_callbacks_shapenet.yaml new file mode 100644 index 00000000..76875bc1 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/callbacks/training_callbacks_shapenet.yaml @@ -0,0 +1,39 @@ +- kind: noether.core.callbacks.CheckpointCallback + every_n_epochs: 10 + save_weights: false + save_latest_weights: true + save_latest_optim: true + save_optim: false + name: CheckpointCallback +# validation loss +- kind: noether.training.callbacks.OfflineLossCallback + batch_size: 1 + every_n_epochs: 1 + dataset_key: test + name: OfflineLossCallback +- kind: noether.core.callbacks.BestCheckpointCallback + every_n_epochs: 1 + metric_key: loss/test/total + name: BestCheckpointCallback +# test loss +- kind: __PROJECT__.callbacks.SurfaceVolumeEvaluationMetricsCallback + batch_size: 1 + every_n_epochs: 1 + dataset_key: test + name: SurfaceVolumeEvaluationMetricsCallback + forward_properties: ${model.forward_properties} +- kind: __PROJECT__.callbacks.SurfaceVolumeEvaluationMetricsCallback + batch_size: 1 + every_n_epochs: ${trainer.max_epochs} + dataset_key: test_repeat + name: SurfaceVolumeEvaluationMetricsCallback + forward_properties: ${model.forward_properties} +#ema +- kind: noether.core.callbacks.EmaCallback + every_n_epochs: 10 + save_weights: false + save_last_weights: false + save_latest_weights: true + target_factors: + - 0.9999 + name: EmaCallback diff --git a/src/noether/scaffold/template_files/configs/data_specs/caeml.yaml b/src/noether/scaffold/template_files/configs/data_specs/caeml.yaml new file mode 100644 index 00000000..9fcd1606 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/data_specs/caeml.yaml @@ -0,0 +1,10 @@ + +# caeml data specifications are for both AhmedML and DrivAerML datasets +position_dim: 3 +surface_output_dims: + pressure: 1 + friction: 3 +volume_output_dims: + pressure: 1 + velocity: 3 + vorticity: 3 \ No newline at end of file diff --git a/src/noether/scaffold/template_files/configs/data_specs/emmi_wing.yaml b/src/noether/scaffold/template_files/configs/data_specs/emmi_wing.yaml new file mode 100644 index 00000000..c6c1673b --- /dev/null +++ b/src/noether/scaffold/template_files/configs/data_specs/emmi_wing.yaml @@ -0,0 +1,12 @@ + +position_dim: 3 +surface_output_dims: + pressure: 1 + friction: 3 +volume_output_dims: + pressure: 1 + velocity: 3 + vorticity: 3 +conditioning_dims: + geometry_design_parameters: 5 + inflow_design_parameters: 2 \ No newline at end of file diff --git a/src/noether/scaffold/template_files/configs/data_specs/shapenet_car.yaml b/src/noether/scaffold/template_files/configs/data_specs/shapenet_car.yaml new file mode 100644 index 00000000..b131fdd9 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/data_specs/shapenet_car.yaml @@ -0,0 +1,13 @@ + +position_dim: 3 +surface_feature_dim: + surface_sdf: 1 + surface_normals: 3 +volume_feature_dim: + volume_sdf: 1 + volume_normals: 3 +surface_output_dims: + pressure: 1 +volume_output_dims: + velocity: 3 +use_physics_features: ${trainer.use_physics_features} \ No newline at end of file diff --git a/src/noether/scaffold/template_files/configs/dataset_normalizers/caeml_dataset_normalizers.yaml b/src/noether/scaffold/template_files/configs/dataset_normalizers/caeml_dataset_normalizers.yaml new file mode 100644 index 00000000..21c4e31d --- /dev/null +++ b/src/noether/scaffold/template_files/configs/dataset_normalizers/caeml_dataset_normalizers.yaml @@ -0,0 +1,31 @@ +surface_position: + kind: noether.data.preprocessors.normalizers.PositionNormalizer + raw_pos_min: ${dataset_statistics.raw_pos_min} + raw_pos_max: ${dataset_statistics.raw_pos_max} + scale: 1000 +surface_pressure: + kind: noether.data.preprocessors.normalizers.MeanStdNormalization + mean: ${dataset_statistics.surface_pressure_mean} + std: ${dataset_statistics.surface_pressure_std} +surface_friction: + kind: noether.data.preprocessors.normalizers.MeanStdNormalization + mean: ${dataset_statistics.surface_friction_mean} + std: ${dataset_statistics.surface_friction_std} +volume_position: + kind: noether.data.preprocessors.normalizers.PositionNormalizer + raw_pos_min: ${dataset_statistics.raw_pos_min} + raw_pos_max: ${dataset_statistics.raw_pos_max} + scale: 1000 +volume_vorticity: + kind: noether.data.preprocessors.normalizers.MeanStdNormalization + mean: ${dataset_statistics.volume_vorticity_logscale_mean} + std: ${dataset_statistics.volume_vorticity_logscale_std} + logscale: true +volume_velocity: + kind: noether.data.preprocessors.normalizers.MeanStdNormalization + mean: ${dataset_statistics.volume_velocity_mean} + std: ${dataset_statistics.volume_velocity_std} +volume_pressure: + kind: noether.data.preprocessors.normalizers.MeanStdNormalization + mean: ${dataset_statistics.volume_pressure_mean} + std: ${dataset_statistics.volume_pressure_std} \ No newline at end of file diff --git a/src/noether/scaffold/template_files/configs/dataset_normalizers/shapenet_dataset_normalizers.yaml b/src/noether/scaffold/template_files/configs/dataset_normalizers/shapenet_dataset_normalizers.yaml new file mode 100644 index 00000000..8a128e5f --- /dev/null +++ b/src/noether/scaffold/template_files/configs/dataset_normalizers/shapenet_dataset_normalizers.yaml @@ -0,0 +1,22 @@ +surface_pressure: + kind: noether.data.preprocessors.normalizers.MeanStdNormalization + mean: ${dataset_statistics.surface_pressure_mean} + std: ${dataset_statistics.surface_pressure_std} +volume_velocity: + kind: noether.data.preprocessors.normalizers.MeanStdNormalization + mean: ${dataset_statistics.volume_velocity_mean} + std: ${dataset_statistics.volume_velocity_std} +volume_sdf: + kind: noether.data.preprocessors.normalizers.MeanStdNormalization + mean: ${dataset_statistics.volume_sdf_mean} + std: ${dataset_statistics.volume_sdf_std} +surface_position: + kind: noether.data.preprocessors.normalizers.PositionNormalizer + raw_pos_min: ${dataset_statistics.raw_pos_min} + raw_pos_max: ${dataset_statistics.raw_pos_max} + scale: 1000 +volume_position: + kind: noether.data.preprocessors.normalizers.PositionNormalizer + raw_pos_min: ${dataset_statistics.raw_pos_min} + raw_pos_max: ${dataset_statistics.raw_pos_max} + scale: 1000 \ No newline at end of file diff --git a/src/noether/scaffold/template_files/configs/dataset_normalizers/wing_dataset_normalizers.yaml b/src/noether/scaffold/template_files/configs/dataset_normalizers/wing_dataset_normalizers.yaml new file mode 100644 index 00000000..c0ca7fbb --- /dev/null +++ b/src/noether/scaffold/template_files/configs/dataset_normalizers/wing_dataset_normalizers.yaml @@ -0,0 +1,38 @@ +surface_position: + - kind: noether.data.preprocessors.normalizers.PositionNormalizer + raw_pos_min: ${dataset_statistics.raw_pos_min} + raw_pos_max: ${dataset_statistics.raw_pos_max} + scale: 1000 +surface_pressure: + - kind: noether.data.preprocessors.normalizers.MeanStdNormalization + mean: ${dataset_statistics.surface_pressure_mean} + std: ${dataset_statistics.surface_pressure_std} +surface_friction: + - kind: noether.data.preprocessors.normalizers.MeanStdNormalization + mean: ${dataset_statistics.surface_friction_mean} + std: ${dataset_statistics.surface_friction_std} +volume_position: + - kind: noether.data.preprocessors.normalizers.PositionNormalizer + raw_pos_min: ${dataset_statistics.raw_pos_min} + raw_pos_max: ${dataset_statistics.raw_pos_max} + scale: 1000 +volume_vorticity: + - kind: noether.data.preprocessors.normalizers.MeanStdNormalization + mean: 0.0 + std: ${dataset_statistics.volume_vorticity_magnitude_mean} +volume_velocity: + - kind: noether.data.preprocessors.normalizers.MeanStdNormalization + mean: ${dataset_statistics.volume_velocity_mean} + std: ${dataset_statistics.volume_velocity_std} +volume_pressure: + - kind: noether.data.preprocessors.normalizers.MeanStdNormalization + mean: ${dataset_statistics.volume_pressure_mean} + std: ${dataset_statistics.volume_pressure_std} +geometry_design_parameters: + - kind: noether.data.preprocessors.normalizers.MeanStdNormalization + mean: ${dataset_statistics.geometry_design_parameters_mean} + std: ${dataset_statistics.geometry_design_parameters_std} +inflow_design_parameters: + - kind: noether.data.preprocessors.normalizers.MeanStdNormalization + mean: ${dataset_statistics.inflow_design_parameters_mean} + std: ${dataset_statistics.inflow_design_parameters_std} diff --git a/src/noether/scaffold/template_files/configs/dataset_statistics/ahmedml_stats.yaml b/src/noether/scaffold/template_files/configs/dataset_statistics/ahmedml_stats.yaml new file mode 100644 index 00000000..eec5b804 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/dataset_statistics/ahmedml_stats.yaml @@ -0,0 +1,12 @@ +raw_pos_min: [-4.0,] +raw_pos_max: [6.0,] +surface_pressure_mean: [-1.00952e-01] +surface_pressure_std: [1.88242e-01] +surface_friction_mean: [-1.52900e-03, 7.83792e-09, -5.82453e-05] +surface_friction_std: [1.17512e-03, 6.52266e-04, 7.13125e-04] +volume_velocity_mean: [8.74600e-01, 1.42877e-05, 7.76145e-03] +volume_velocity_std: [3.00305e-01, 1.14927e-01, 1.24698e-01] +volume_pressure_mean: [8.12013e-01] +volume_pressure_std: [3.67992e-01] +volume_vorticity_logscale_mean: [-1.45271e-04, 1.29314e-01, 1.29501e-05] +volume_vorticity_logscale_std: [1.11017e00, 1.96530e00, 1.77495e00] \ No newline at end of file diff --git a/src/noether/scaffold/template_files/configs/dataset_statistics/drivaerml_stats.yaml b/src/noether/scaffold/template_files/configs/dataset_statistics/drivaerml_stats.yaml new file mode 100644 index 00000000..78939dc1 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/dataset_statistics/drivaerml_stats.yaml @@ -0,0 +1,12 @@ +raw_pos_min: [-40.0,] +raw_pos_max: [80.0,] +surface_pressure_mean: [-2.29772e02,] +surface_pressure_std: [2.69345e02,] +surface_friction_mean: [-1.20054e00, 1.49358e-03, -7.20107e-02] +surface_friction_std: [2.07670e00, 1.35628e00, 1.11426e00] +volume_velocity_mean: [1.67909e01, -3.82238e-02, 4.07968e-01] +volume_velocity_std: [1.64115e01, 8.63614e00, 6.64996e00] +volume_pressure_mean: [1.71387e-01,] +volume_pressure_std: [5.00826e-01,] +volume_vorticity_logscale_mean: [-1.47814e-02, 7.87642e-01, 2.81023e-03] +volume_vorticity_logscale_std: [5.45681e00, 5.77081e00, 5.46175e00] \ No newline at end of file diff --git a/src/noether/scaffold/template_files/configs/dataset_statistics/drivaernet++_stats.yaml b/src/noether/scaffold/template_files/configs/dataset_statistics/drivaernet++_stats.yaml new file mode 100644 index 00000000..939a1b49 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/dataset_statistics/drivaernet++_stats.yaml @@ -0,0 +1,24 @@ +raw_pos_min: [-12.01] +raw_pos_max: [6.41,] +surface_domain_min: [-1.15154e00, -1.19442e00, 0.00000e00] +surface_domain_max: [4.20453e00, 1.19442e00, 1.76205e00] +surface_pos_mean: [1.53702e00, -8.12572e-03, 5.93542e-01] +surface_pos_std: [1.39704e00, 6.44139e-01, 3.88172e-01] +surface_pressure_mean: [-9.34098e01] +surface_pressure_std: [1.20787e02] +surface_friction_mean: [-6.71649e-01, 3.63487e-02, -8.46379e-02] +surface_friction_std: [8.19941e-01, 4.51045e-01, 7.81055e-01] +volume_pos_mean: [2.17577e00, -3.35840e-01, 7.60331e-01] +volume_pos_std: [3.06283e00, 7.20275e-01, 6.50708e-01] +volume_pressure_mean: [-6.24053e01] +volume_pressure_std: [9.42394e01] +volume_velocity_mean: [2.18719e01, -2.37778e-01, 6.73902e-01] +volume_velocity_std: [1.21079e01, 3.97768e00, 3.90113e00] +volume_vorticity_mean: [2.24115e00, 3.02183e01, 2.58549e01] +volume_vorticity_std: [1.25151e05, 1.50186e05, 2.33779e05] +volume_vorticity_logscale_mean: [2.57623e-02, 2.58335e-01, 4.29835e-01] +volume_vorticity_logscale_std: [3.00179e00, 3.65020e00, 3.33356e00] +volume_vorticity_magnitude_mean: 3.03779e02 +volume_vorticity_magnitude_std: 3.04748e05 +volume_domain_min: [-1.20012e01, -4.00040e00, -3.46979e-18] +volume_domain_max: [3.20032e01, 4.00040e00, 6.40064e00] \ No newline at end of file diff --git a/src/noether/scaffold/template_files/configs/dataset_statistics/shapenet_car_stats.yaml b/src/noether/scaffold/template_files/configs/dataset_statistics/shapenet_car_stats.yaml new file mode 100644 index 00000000..03eb8c69 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/dataset_statistics/shapenet_car_stats.yaml @@ -0,0 +1,8 @@ +raw_pos_min: [-4.5,] +raw_pos_max: [6.0,] +surface_pressure_mean: [-36.4098] +surface_pressure_std: [48.6757] +volume_velocity_mean: [0.00293915, -0.0230546, 17.546032] +volume_velocity_std: [1.361689, 1.267649, 5.850353] +volume_sdf_mean: [3.74222e-01] +volume_sdf_std: [1.78948e-01] \ No newline at end of file diff --git a/src/noether/scaffold/template_files/configs/dataset_statistics/wing_stats.yaml b/src/noether/scaffold/template_files/configs/dataset_statistics/wing_stats.yaml new file mode 100644 index 00000000..ef42ed4c --- /dev/null +++ b/src/noether/scaffold/template_files/configs/dataset_statistics/wing_stats.yaml @@ -0,0 +1,21 @@ + +raw_pos_min: [-17.5] +raw_pos_max: [17.5] + +surface_pressure_mean: [92656.34610807039] +surface_pressure_std: [11929.058756240694] +surface_friction_mean: [-74.10092405045339,-0.5525946509854017,0.0401677695420727] +surface_friction_std: [47.16838501471528,10.233076648224564,23.08224849769229] +volume_velocity_mean: [187.92724405048926,0.5335961966484881,-0.0812512160659759] +volume_velocity_std: [83.6810800019851,19.911990565773156,33.3370080829507] +volume_pressure_mean: [93342.81261991762] +volume_pressure_std: [11743.515250769764] + +volume_vorticity_logscale_mean: [-0.013138919851553849, 0.0033505699708222037, -1.6626923006758065] +volume_vorticity_logscale_std: [6.217698713293661,8.787798800793741,5.512463961862133] +volume_vorticity_magnitude_mean: 3.79027e+04 + +geometry_design_parameters_mean: [0.9495975525165955,1.2492765187383101,0.5500557258109463,19.92001709415523,0] +geometry_design_parameters_std: [0.14422534397647097,0.14465871522018967,0.08706049742886027,11.557458965186635,1] +inflow_design_parameters_mean: [224.59267997259747,-0.045877170231132774] +inflow_design_parameters_std: [43.25454930960363,5.792067554535914] \ No newline at end of file diff --git a/src/noether/scaffold/template_files/configs/datasets/caeml_dataset.yaml b/src/noether/scaffold/template_files/configs/datasets/caeml_dataset.yaml new file mode 100644 index 00000000..d14c2db1 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/datasets/caeml_dataset.yaml @@ -0,0 +1,31 @@ +train: + root: ${dataset_root} + kind: ${dataset_kind} + split: train + pipeline: ${pipeline} + dataset_normalizers: ${dataset_normalizers} + excluded_properties: ${excluded_properties} +val: + root: ${dataset_root} + kind: ${dataset_kind} + split: val + pipeline: ${pipeline} + dataset_normalizers: ${dataset_normalizers} + excluded_properties: ${excluded_properties} +test: + root: ${dataset_root} + kind: ${dataset_kind} + split: test + pipeline: ${pipeline} + dataset_normalizers: ${dataset_normalizers} + excluded_properties: ${excluded_properties} +chunked_test: + root: ${dataset_root} + kind: ${dataset_kind} + split: test + pipeline: ${inference_pipeline} + dataset_normalizers: ${dataset_normalizers} + excluded_properties: ${excluded_properties} + dataset_wrappers: + - kind: noether.data.base.wrappers.RepeatWrapper + repetitions: 10 \ No newline at end of file diff --git a/src/noether/scaffold/template_files/configs/datasets/shapenet_dataset.yaml b/src/noether/scaffold/template_files/configs/datasets/shapenet_dataset.yaml new file mode 100644 index 00000000..0f68631e --- /dev/null +++ b/src/noether/scaffold/template_files/configs/datasets/shapenet_dataset.yaml @@ -0,0 +1,25 @@ +train: + root: ${dataset_root} + kind: ${dataset_kind} + split: train + pipeline: ${pipeline} + dataset_normalizers: ${dataset_normalizers} + excluded_properties: ${excluded_properties} +test: + root: ${dataset_root} + kind: ${dataset_kind} + split: test + pipeline: ${pipeline} + dataset_normalizers: ${dataset_normalizers} + excluded_properties: ${excluded_properties} +test_repeat: + root: ${dataset_root} + kind: ${dataset_kind} + split: test + pipeline: ${pipeline} + dataset_normalizers: ${dataset_normalizers} + excluded_properties: ${excluded_properties} + dataset_wrappers: + - kind: noether.data.base.wrappers.RepeatWrapper + repetitions: 10 + \ No newline at end of file diff --git a/src/noether/scaffold/template_files/configs/experiment/caeml/ab_upt.yaml b/src/noether/scaffold/template_files/configs/experiment/caeml/ab_upt.yaml new file mode 100644 index 00000000..9efffaa0 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/experiment/caeml/ab_upt.yaml @@ -0,0 +1,28 @@ +# @package _global_ +defaults: + - override /model: ab_upt + - override /tracker: __TRACKER__ + +name: __PROJECT__-ab-upt + +model: + supernode_pooling_config: + radius: 0.25 + +pipeline: + num_geometry_points: 65536 + num_geometry_supernodes: 16384 + num_surface_anchor_points: 16384 + num_volume_anchor_points: 16384 + +inference_pipeline: + num_geometry_points: 65536 + num_geometry_supernodes: 16384 + num_surface_anchor_points: 1000000000 + num_volume_anchor_points: 1000000000 + +sample_size_property: surface_anchor_position +chunk_size: ${pipeline.num_surface_anchor_points} +chunk_properties: + - surface_anchor_position + - volume_anchor_position diff --git a/src/noether/scaffold/template_files/configs/experiment/caeml/transformer.yaml b/src/noether/scaffold/template_files/configs/experiment/caeml/transformer.yaml new file mode 100644 index 00000000..f8b0b847 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/experiment/caeml/transformer.yaml @@ -0,0 +1,16 @@ +# @package _global_ +defaults: + - override /model: transformer + - override /tracker: __TRACKER__ + +name: __PROJECT__-transformer + +inference_pipeline: + num_surface_points: 1000000000 + num_volume_points: 1000000000 + +sample_size_property: surface_position +chunk_size: ${pipeline.num_surface_points} +chunk_properties: + - surface_position + - volume_position diff --git a/src/noether/scaffold/template_files/configs/experiment/caeml/transolver.yaml b/src/noether/scaffold/template_files/configs/experiment/caeml/transolver.yaml new file mode 100644 index 00000000..68efd39f --- /dev/null +++ b/src/noether/scaffold/template_files/configs/experiment/caeml/transolver.yaml @@ -0,0 +1,16 @@ +# @package _global_ +defaults: + - override /model: transolver + - override /tracker: __TRACKER__ + +name: __PROJECT__-transolver + +inference_pipeline: + num_surface_points: 1000000000 + num_volume_points: 1000000000 + +sample_size_property: surface_position +chunk_size: ${pipeline.num_surface_points} +chunk_properties: + - surface_position + - volume_position diff --git a/src/noether/scaffold/template_files/configs/experiment/caeml/upt.yaml b/src/noether/scaffold/template_files/configs/experiment/caeml/upt.yaml new file mode 100644 index 00000000..457ad259 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/experiment/caeml/upt.yaml @@ -0,0 +1,28 @@ +# @package _global_ +defaults: + - override /model: upt + - override /tracker: __TRACKER__ + +name: __PROJECT__-upt + +model: + supernode_pooling_config: + radius: 0.25 + +pipeline: + num_supernodes: 16384 + num_surface_queries: 16384 + num_volume_queries: 16384 + num_surface_points: 65536 + +inference_pipeline: + num_supernodes: 16384 + num_surface_queries: 1000000000 + num_volume_queries: 1000000000 + num_surface_points: 65536 + +sample_size_property: surface_query_position +chunk_size: ${pipeline.num_surface_queries} +chunk_properties: + - surface_query_position + - volume_query_position diff --git a/src/noether/scaffold/template_files/configs/experiment/emmi_wing/ab_upt.yaml b/src/noether/scaffold/template_files/configs/experiment/emmi_wing/ab_upt.yaml new file mode 100644 index 00000000..8bf97b48 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/experiment/emmi_wing/ab_upt.yaml @@ -0,0 +1,36 @@ +# @package _global_ +defaults: + - override /model: ab_upt + - override /tracker: __TRACKER__ + +name: __PROJECT__-ab-upt + +model: + supernode_pooling_config: + radius: 0.25 + forward_properties: + - geometry_position + - geometry_supernode_idx + - geometry_batch_idx + - surface_anchor_position + - volume_anchor_position + - geometry_design_parameters + - inflow_design_parameters + +pipeline: + num_geometry_points: 65536 + num_geometry_supernodes: 16384 + num_surface_anchor_points: 16384 + num_volume_anchor_points: 16384 + +inference_pipeline: + num_geometry_points: 65536 + num_geometry_supernodes: 16384 + num_surface_anchor_points: 1000000000 + num_volume_anchor_points: 1000000000 + +sample_size_property: surface_anchor_position +chunk_size: ${pipeline.num_surface_anchor_points} +chunk_properties: + - surface_anchor_position + - volume_anchor_position diff --git a/src/noether/scaffold/template_files/configs/experiment/emmi_wing/transformer.yaml b/src/noether/scaffold/template_files/configs/experiment/emmi_wing/transformer.yaml new file mode 100644 index 00000000..f8b0b847 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/experiment/emmi_wing/transformer.yaml @@ -0,0 +1,16 @@ +# @package _global_ +defaults: + - override /model: transformer + - override /tracker: __TRACKER__ + +name: __PROJECT__-transformer + +inference_pipeline: + num_surface_points: 1000000000 + num_volume_points: 1000000000 + +sample_size_property: surface_position +chunk_size: ${pipeline.num_surface_points} +chunk_properties: + - surface_position + - volume_position diff --git a/src/noether/scaffold/template_files/configs/experiment/emmi_wing/transolver.yaml b/src/noether/scaffold/template_files/configs/experiment/emmi_wing/transolver.yaml new file mode 100644 index 00000000..68efd39f --- /dev/null +++ b/src/noether/scaffold/template_files/configs/experiment/emmi_wing/transolver.yaml @@ -0,0 +1,16 @@ +# @package _global_ +defaults: + - override /model: transolver + - override /tracker: __TRACKER__ + +name: __PROJECT__-transolver + +inference_pipeline: + num_surface_points: 1000000000 + num_volume_points: 1000000000 + +sample_size_property: surface_position +chunk_size: ${pipeline.num_surface_points} +chunk_properties: + - surface_position + - volume_position diff --git a/src/noether/scaffold/template_files/configs/experiment/emmi_wing/upt.yaml b/src/noether/scaffold/template_files/configs/experiment/emmi_wing/upt.yaml new file mode 100644 index 00000000..457ad259 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/experiment/emmi_wing/upt.yaml @@ -0,0 +1,28 @@ +# @package _global_ +defaults: + - override /model: upt + - override /tracker: __TRACKER__ + +name: __PROJECT__-upt + +model: + supernode_pooling_config: + radius: 0.25 + +pipeline: + num_supernodes: 16384 + num_surface_queries: 16384 + num_volume_queries: 16384 + num_surface_points: 65536 + +inference_pipeline: + num_supernodes: 16384 + num_surface_queries: 1000000000 + num_volume_queries: 1000000000 + num_surface_points: 65536 + +sample_size_property: surface_query_position +chunk_size: ${pipeline.num_surface_queries} +chunk_properties: + - surface_query_position + - volume_query_position diff --git a/src/noether/scaffold/template_files/configs/experiment/shapenet/ab_upt.yaml b/src/noether/scaffold/template_files/configs/experiment/shapenet/ab_upt.yaml new file mode 100644 index 00000000..d4465175 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/experiment/shapenet/ab_upt.yaml @@ -0,0 +1,16 @@ +# @package _global_ +defaults: + - override /model: ab_upt + - override /tracker: __TRACKER__ + +name: __PROJECT__-ab-upt + +model: + supernode_pooling_config: + radius: 9 + +pipeline: + num_geometry_points: 3586 + num_geometry_supernodes: 3586 + num_surface_anchor_points: 3586 + num_volume_anchor_points: 4096 diff --git a/src/noether/scaffold/template_files/configs/experiment/shapenet/transformer.yaml b/src/noether/scaffold/template_files/configs/experiment/shapenet/transformer.yaml new file mode 100644 index 00000000..347a3013 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/experiment/shapenet/transformer.yaml @@ -0,0 +1,6 @@ +# @package _global_ +defaults: + - override /model: transformer + - override /tracker: __TRACKER__ + +name: __PROJECT__-transformer diff --git a/src/noether/scaffold/template_files/configs/experiment/shapenet/transolver.yaml b/src/noether/scaffold/template_files/configs/experiment/shapenet/transolver.yaml new file mode 100644 index 00000000..c9f778bb --- /dev/null +++ b/src/noether/scaffold/template_files/configs/experiment/shapenet/transolver.yaml @@ -0,0 +1,6 @@ +# @package _global_ +defaults: + - override /model: transolver + - override /tracker: __TRACKER__ + +name: __PROJECT__-transolver diff --git a/src/noether/scaffold/template_files/configs/experiment/shapenet/upt.yaml b/src/noether/scaffold/template_files/configs/experiment/shapenet/upt.yaml new file mode 100644 index 00000000..678d6c0d --- /dev/null +++ b/src/noether/scaffold/template_files/configs/experiment/shapenet/upt.yaml @@ -0,0 +1,16 @@ +# @package _global_ +defaults: + - override /model: upt + - override /tracker: __TRACKER__ + +name: __PROJECT__-upt + +model: + supernode_pooling_config: + radius: 9 + +pipeline: + num_supernodes: 3586 + num_surface_queries: 3586 + num_volume_queries: 4096 + sample_query_points: true diff --git a/src/noether/scaffold/template_files/configs/model/ab_upt.yaml b/src/noether/scaffold/template_files/configs/model/ab_upt.yaml new file mode 100644 index 00000000..9c5f8131 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/model/ab_upt.yaml @@ -0,0 +1,33 @@ +kind: __PROJECT__.model.ABUPT +name: ab_upt +geometry_depth: 1 +hidden_dim: 192 +use_bias_layers: false +transformer_block_config: + num_heads: 3 + mlp_expansion_factor: 4 + use_rope: true +supernode_pooling_config: + input_dim: ${data_specs.position_dim} + radius: 1 +physics_blocks: + - perceiver + - shared + - cross + - shared + - cross + - shared + - cross + - shared + - cross + - shared +num_surface_blocks: 2 +num_volume_blocks: 2 +optimizer_config: ${optimizer} +forward_properties: + - geometry_position + - geometry_supernode_idx + - geometry_batch_idx + - surface_anchor_position + - volume_anchor_position +data_specs: ${data_specs} diff --git a/src/noether/scaffold/template_files/configs/model/transformer.yaml b/src/noether/scaffold/template_files/configs/model/transformer.yaml new file mode 100644 index 00000000..2d953eab --- /dev/null +++ b/src/noether/scaffold/template_files/configs/model/transformer.yaml @@ -0,0 +1,15 @@ +kind: __PROJECT__.model.Transformer +name: transformer +hidden_dim: 192 +depth: 12 +num_heads: 3 +mlp_expansion_factor: 4 +optimizer_config: ${optimizer} +use_output_projection: true +use_rope: true +data_specs: ${data_specs} +forward_properties: + - surface_position + - volume_position + - surface_features + - volume_features diff --git a/src/noether/scaffold/template_files/configs/model/transolver.yaml b/src/noether/scaffold/template_files/configs/model/transolver.yaml new file mode 100644 index 00000000..5d48f1f3 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/model/transolver.yaml @@ -0,0 +1,16 @@ +kind: __PROJECT__.model.Transolver +name: transolver +hidden_dim: 192 +depth: 12 +num_heads: 3 +attention_arguments: + num_slices: 512 +mlp_expansion_factor: 4 +use_output_projection: true +optimizer_config: ${optimizer} +data_specs: ${data_specs} +forward_properties: + - surface_position + - volume_position + - surface_features + - volume_features diff --git a/src/noether/scaffold/template_files/configs/model/upt.yaml b/src/noether/scaffold/template_files/configs/model/upt.yaml new file mode 100644 index 00000000..b513ec5c --- /dev/null +++ b/src/noether/scaffold/template_files/configs/model/upt.yaml @@ -0,0 +1,30 @@ +kind: __PROJECT__.model.UPT +name: upt +hidden_dim: 192 +approximator_depth: 12 +num_heads: 3 +mlp_expansion_factor: 4 +use_rope: true +use_bias_layers: true +data_specs: ${data_specs} +supernode_pooling_config: + input_dim: ${data_specs.position_dim} + radius: 9 +approximator_config: + use_rope: ${model.use_rope} +decoder_config: + depth: 12 + input_dim: ${data_specs.position_dim} + perceiver_block_config: + use_rope: ${model.use_rope} +optimizer_config: ${optimizer} +forward_properties: + - surface_mask_query + - surface_position_batch_idx + - surface_position_supernode_idx + - surface_position + - surface_features + - surface_query_position + - surface_query_features + - volume_query_position + - volume_query_features diff --git a/src/noether/scaffold/template_files/configs/optimizer/adamw.yaml b/src/noether/scaffold/template_files/configs/optimizer/adamw.yaml new file mode 100644 index 00000000..63dfc913 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/optimizer/adamw.yaml @@ -0,0 +1,9 @@ +kind: torch.optim.AdamW +lr: 1.0e-3 +weight_decay: 0.05 +clip_grad_norm: 1.0 +schedule_config: + kind: noether.core.schedules.LinearWarmupCosineDecaySchedule + warmup_percent: 0.05 + end_value: 1.0e-6 + max_value: ${model.optimizer_config.lr} \ No newline at end of file diff --git a/src/noether/scaffold/template_files/configs/optimizer/lion.yaml b/src/noether/scaffold/template_files/configs/optimizer/lion.yaml new file mode 100644 index 00000000..655d0ad0 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/optimizer/lion.yaml @@ -0,0 +1,9 @@ +kind: noether.core.optimizer.Lion +lr: 5.0e-5 +weight_decay: 0.05 +clip_grad_norm: 0.25 +schedule_config: + kind: noether.core.schedules.LinearWarmupCosineDecaySchedule + warmup_percent: 0.05 + end_value: 1.0e-6 + max_value: ${model.optimizer_config.lr} # Set max_value to match the learning rate defined above \ No newline at end of file diff --git a/src/noether/scaffold/template_files/configs/pipeline/caeml_pipeline.yaml b/src/noether/scaffold/template_files/configs/pipeline/caeml_pipeline.yaml new file mode 100644 index 00000000..88fe5c5d --- /dev/null +++ b/src/noether/scaffold/template_files/configs/pipeline/caeml_pipeline.yaml @@ -0,0 +1,8 @@ +kind: __PROJECT__.pipeline.AeroMultistagePipeline +num_surface_points: 16384 +num_volume_points: ${pipeline.num_surface_points} +num_surface_queries: 0 +num_volume_queries: 0 +use_physics_features: ${trainer.use_physics_features} +dataset_statistics: ${dataset_statistics} +data_specs: ${data_specs} diff --git a/src/noether/scaffold/template_files/configs/pipeline/shapenet_pipeline.yaml b/src/noether/scaffold/template_files/configs/pipeline/shapenet_pipeline.yaml new file mode 100644 index 00000000..d546ae00 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/pipeline/shapenet_pipeline.yaml @@ -0,0 +1,8 @@ +kind: __PROJECT__.pipeline.AeroMultistagePipeline +num_surface_points: 3586 +num_volume_points: 4096 +num_surface_queries: 0 +num_volume_queries: 0 +use_physics_features: ${trainer.use_physics_features} +dataset_statistics: ${dataset_statistics} +data_specs: ${data_specs} diff --git a/src/noether/scaffold/template_files/configs/tracker/disabled.yaml b/src/noether/scaffold/template_files/configs/tracker/disabled.yaml new file mode 100644 index 00000000..490d47dc --- /dev/null +++ b/src/noether/scaffold/template_files/configs/tracker/disabled.yaml @@ -0,0 +1 @@ +null # This tracker is disabled diff --git a/src/noether/scaffold/template_files/configs/tracker/tensorboard.yaml b/src/noether/scaffold/template_files/configs/tracker/tensorboard.yaml new file mode 100644 index 00000000..01741918 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/tracker/tensorboard.yaml @@ -0,0 +1,2 @@ +kind: noether.core.trackers.TensorboardTracker +log_dir: tensorboard_logs diff --git a/src/noether/scaffold/template_files/configs/tracker/trackio.yaml b/src/noether/scaffold/template_files/configs/tracker/trackio.yaml new file mode 100644 index 00000000..3b47f92e --- /dev/null +++ b/src/noether/scaffold/template_files/configs/tracker/trackio.yaml @@ -0,0 +1,2 @@ +kind: noether.core.trackers.TrackioTracker +project: __PROJECT__ diff --git a/src/noether/scaffold/template_files/configs/tracker/wandb.yaml b/src/noether/scaffold/template_files/configs/tracker/wandb.yaml new file mode 100644 index 00000000..671aff25 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/tracker/wandb.yaml @@ -0,0 +1,3 @@ +kind: noether.core.trackers.WandBTracker +entity: null +project: __PROJECT__ diff --git a/src/noether/scaffold/template_files/configs/train_ahmedml.yaml b/src/noether/scaffold/template_files/configs/train_ahmedml.yaml new file mode 100644 index 00000000..b56fea65 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/train_ahmedml.yaml @@ -0,0 +1,26 @@ +# @package _global_ + +defaults: + - data_specs: caeml + - dataset_normalizers: caeml_dataset_normalizers + - dataset_statistics: ahmedml_stats + - model: "???" + - trainer: caeml_trainer + - datasets: caeml_dataset + - tracker: "??" + - callbacks: training_callbacks_caeml + - pipeline: caeml_pipeline + - /pipeline@inference_pipeline: caeml_pipeline + - optimizer: __OPTIMIZER__ + - _self_ + +dataset_root: __DATASET_PATH__ +dataset_kind: noether.data.datasets.cfd.AhmedMLDataset +config_schema_kind: __PROJECT__.schemas.config_schema.ConfigSchema +excluded_properties: + - volume_sdf + - volume_normals + - surface_normals +stage_name: train +store_code_in_output: false +output_path: ./outputs diff --git a/src/noether/scaffold/template_files/configs/train_drivaerml.yaml b/src/noether/scaffold/template_files/configs/train_drivaerml.yaml new file mode 100644 index 00000000..66b08f14 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/train_drivaerml.yaml @@ -0,0 +1,26 @@ +# @package _global_ + +defaults: + - data_specs: caeml + - dataset_normalizers: caeml_dataset_normalizers + - dataset_statistics: drivaerml_stats + - model: "???" + - trainer: caeml_trainer + - datasets: caeml_dataset + - tracker: "??" + - callbacks: training_callbacks_caeml + - pipeline: caeml_pipeline + - /pipeline@inference_pipeline: caeml_pipeline + - optimizer: __OPTIMIZER__ + - _self_ + +dataset_root: __DATASET_PATH__ +dataset_kind: noether.data.datasets.cfd.DrivAerMLDataset +config_schema_kind: __PROJECT__.schemas.config_schema.ConfigSchema +excluded_properties: + - volume_sdf + - volume_normals + - surface_normals +stage_name: train +store_code_in_output: false +output_path: ./outputs diff --git a/src/noether/scaffold/template_files/configs/train_drivaernet.yaml b/src/noether/scaffold/template_files/configs/train_drivaernet.yaml new file mode 100644 index 00000000..fa4210be --- /dev/null +++ b/src/noether/scaffold/template_files/configs/train_drivaernet.yaml @@ -0,0 +1,26 @@ +# @package _global_ + +defaults: + - data_specs: caeml + - dataset_normalizers: caeml_dataset_normalizers + - dataset_statistics: drivaernet++_stats + - model: "???" + - trainer: caeml_trainer + - datasets: caeml_dataset + - tracker: "??" + - callbacks: training_callbacks_caeml + - pipeline: caeml_pipeline + - /pipeline@inference_pipeline: caeml_pipeline + - optimizer: __OPTIMIZER__ + - _self_ + +dataset_root: __DATASET_PATH__ +dataset_kind: noether.data.datasets.cfd.DrivAerNetDataset +config_schema_kind: __PROJECT__.schemas.config_schema.ConfigSchema +excluded_properties: + - volume_sdf + - volume_normals + - surface_normals +stage_name: train +store_code_in_output: false +output_path: ./outputs diff --git a/src/noether/scaffold/template_files/configs/train_emmi_wing.yaml b/src/noether/scaffold/template_files/configs/train_emmi_wing.yaml new file mode 100644 index 00000000..4bd24faa --- /dev/null +++ b/src/noether/scaffold/template_files/configs/train_emmi_wing.yaml @@ -0,0 +1,26 @@ +# @package _global_ + +defaults: + - data_specs: emmi_wing + - dataset_normalizers: wing_dataset_normalizers + - dataset_statistics: wing_stats + - model: "???" + - trainer: caeml_trainer + - datasets: caeml_dataset + - tracker: "??" + - callbacks: training_callbacks_caeml + - pipeline: caeml_pipeline + - /pipeline@inference_pipeline: caeml_pipeline + - optimizer: __OPTIMIZER__ + - _self_ + +dataset_root: __DATASET_PATH__ +dataset_kind: noether.data.datasets.cfd.EmmiWingDataset +config_schema_kind: __PROJECT__.schemas.config_schema.ConfigSchema +excluded_properties: + - volume_sdf + - volume_normals + - surface_normals +stage_name: train +store_code_in_output: false +output_path: ./outputs diff --git a/src/noether/scaffold/template_files/configs/train_shapenet_car.yaml b/src/noether/scaffold/template_files/configs/train_shapenet_car.yaml new file mode 100644 index 00000000..e8b179da --- /dev/null +++ b/src/noether/scaffold/template_files/configs/train_shapenet_car.yaml @@ -0,0 +1,25 @@ +# @package _global_ + +defaults: + - data_specs: shapenet_car + - dataset_normalizers: shapenet_dataset_normalizers + - dataset_statistics: shapenet_car_stats + - model: "???" + - trainer: shapenet_trainer + - datasets: shapenet_dataset + - tracker: "??" + - callbacks: training_callbacks_shapenet + - pipeline: shapenet_pipeline + - optimizer: __OPTIMIZER__ + - _self_ + +dataset_root: __DATASET_PATH__ +dataset_kind: noether.data.datasets.cfd.ShapeNetCarDataset +config_schema_kind: __PROJECT__.schemas.config_schema.ConfigSchema +excluded_properties: + - surface_friction + - volume_pressure + - volume_vorticity +stage_name: train +store_code_in_output: false +output_path: ./outputs diff --git a/src/noether/scaffold/template_files/configs/trainer/caeml_trainer.yaml b/src/noether/scaffold/template_files/configs/trainer/caeml_trainer.yaml new file mode 100644 index 00000000..5ddafc6e --- /dev/null +++ b/src/noether/scaffold/template_files/configs/trainer/caeml_trainer.yaml @@ -0,0 +1,23 @@ +#BaseTrainerConfig +kind: __PROJECT__.trainers.AutomotiveAerodynamicsCFDTrainer +precision: bfloat16 +max_epochs: 500 +effective_batch_size: 1 +log_every_n_epochs: 1 +callbacks: ${callbacks} +forward_properties: ${model.forward_properties} +target_properties: + - surface_pressure_target + - volume_velocity_target + - volume_pressure_target + - surface_friction_target + - volume_vorticity_target +# AutomotiveAerodynamicsCFDTrainerConfig +surface_weight: 1.0 +volume_weight: 1.0 +surface_pressure_weight: 1.0 +surface_friction_weight: 1.0 +volume_velocity_weight: 1.0 +volume_vorticity_weight: 1.0 +volume_pressure_weight: 1.0 +use_physics_features: false # whether to use the physics features (e.g., surface normals, volume normals, SDF) as input to the model diff --git a/src/noether/scaffold/template_files/configs/trainer/shapenet_trainer.yaml b/src/noether/scaffold/template_files/configs/trainer/shapenet_trainer.yaml new file mode 100644 index 00000000..b9515cd7 --- /dev/null +++ b/src/noether/scaffold/template_files/configs/trainer/shapenet_trainer.yaml @@ -0,0 +1,18 @@ + +#BaseTrainerConfig +kind: __PROJECT__.trainers.AutomotiveAerodynamicsCFDTrainer +precision: bfloat16 +max_epochs: 5 +effective_batch_size: 1 +log_every_n_epochs: 1 +callbacks: ${callbacks} +forward_properties: ${model.forward_properties} +target_properties: + - surface_pressure_target + - volume_velocity_target +# AutomotiveAerodynamicsCFDTrainerConfig +surface_weight: 1.0 +volume_weight: 1.0 +surface_pressure_weight: 1.0 +volume_velocity_weight: 1.0 +use_physics_features: false # whether to use the physics features (e.g., surface normals, volume normals, SDF) as input to the model diff --git a/src/noether/scaffold/template_files/model/ab_upt.py b/src/noether/scaffold/template_files/model/ab_upt.py new file mode 100644 index 00000000..6e7d2bdb --- /dev/null +++ b/src/noether/scaffold/template_files/model/ab_upt.py @@ -0,0 +1,77 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. +import torch +from __PROJECT__.schemas.models import ABUPTConfig + +from noether.modeling.models import AnchoredBranchedUPT as ABUPTBackbone + +from .base import BaseModel + + +class ABUPT(BaseModel): + """Implementation of the AB-UPT model.""" + + def __init__( + self, + model_config: ABUPTConfig, + **kwargs, + ): + """Initialize the AB-UPT model. + + Args: + model_config: The configuration for the AB-UPT model. + """ + + super().__init__(model_config=model_config, **kwargs) + + self.ab_upt = ABUPTBackbone( + config=model_config, + ) + + def forward( + # geometry + self, + geometry_position: torch.Tensor, + geometry_supernode_idx: torch.Tensor, + geometry_batch_idx: torch.Tensor | None, + # anchors + surface_anchor_position: torch.Tensor, + volume_anchor_position: torch.Tensor, + # design parameters + geometry_design_parameters: torch.Tensor | None = None, + inflow_design_parameters: torch.Tensor | None = None, + # queries + query_surface_position: torch.Tensor | None = None, + query_volume_position: torch.Tensor | None = None, + ) -> dict[str, torch.Tensor]: + """Forward pass of the AB-UPT model. + + Args: + geometry_position: Positions of the geometry points. + geometry_supernode_idx: Indices of the supernodes for the geometry points. + geometry_batch_idx: Batch indices for the geometry points. + surface_position: Positions of the surface anchor points. + volume_position: Positions of the volume anchor points. + geometry_design_parameters: Design parameters for the geometry. + inflow_design_parameters: Design parameters for the inflow. + query_surface_position: Query positions for the surface points. + query_volume_position: Query positions for the volume points. + + Returns: + A dictionary containing the model outputs. + """ + + return self.ab_upt( + # geometry + geometry_position=geometry_position, + geometry_supernode_idx=geometry_supernode_idx, + geometry_batch_idx=geometry_batch_idx, + # anchors + surface_anchor_position=surface_anchor_position, + volume_anchor_position=volume_anchor_position, + # design parameters + geometry_design_parameters=geometry_design_parameters, + inflow_design_parameters=inflow_design_parameters, + # queries + query_surface_position=query_surface_position, + query_volume_position=query_volume_position, + ) diff --git a/src/noether/scaffold/template_files/model/base.py b/src/noether/scaffold/template_files/model/base.py new file mode 100644 index 00000000..35d89256 --- /dev/null +++ b/src/noether/scaffold/template_files/model/base.py @@ -0,0 +1,207 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + + +import torch +import torch.nn as nn +from __PROJECT__.schemas.models.base_config import BaseModelConfig + +from noether.core.models import Model +from noether.core.schemas.modules.layers import ContinuousSincosEmbeddingConfig, LinearProjectionConfig +from noether.core.schemas.modules.mlp import MLPConfig +from noether.modeling.modules.layers import ContinuousSincosEmbed, LinearProjection +from noether.modeling.modules.mlp import MLP + + +class BaseModel(Model): + """Base class for all model we use in this __PROJECT__. + + Args: + Model: Base class for single models. + """ + + def __init__( + self, + model_config: BaseModelConfig, + **kwargs, + ): + """ + Args: + model_config: Configuration of the model. + """ + + super().__init__(model_config=model_config, **kwargs) + + self.input_dim = model_config.data_specs.position_dim + self.output_dim = model_config.data_specs.total_output_dim + self.use_physics_features = model_config.data_specs.use_physics_features + self.position_projection = model_config.position_projection + self.name = model_config.name + + if model_config.hidden_dim: + if self.position_projection == "sincos": + self.pos_embed = ContinuousSincosEmbed( + config=ContinuousSincosEmbeddingConfig(hidden_dim=model_config.hidden_dim, input_dim=3) + ) + elif self.position_projection == "linear": + self.pos_embed = LinearProjection( + config=LinearProjectionConfig( + input_dim=3, output_dim=model_config.hidden_dim, init_weights="truncnormal002" + ) + ) + else: + raise ValueError( + f"Unknown position projection: {self.position_projection}. Only 'sincos' and 'linear' are supported." + ) + + if model_config.use_bias_layers: + self.surface_bias = MLP( + config=MLPConfig( + input_dim=model_config.hidden_dim, + hidden_dim=model_config.hidden_dim, + output_dim=model_config.hidden_dim, + ) + ) + + self.volume_bias = MLP( + config=MLPConfig( + input_dim=model_config.hidden_dim, + hidden_dim=model_config.hidden_dim, + output_dim=model_config.hidden_dim, + ) + ) + + if self.use_physics_features: + self.project_volume_features = None + self.project_surface_features = None + if model_config.data_specs.volume_feature_dim_total > 0: + self.project_volume_features = LinearProjection( + config=LinearProjectionConfig( + input_dim=model_config.data_specs.volume_feature_dim_total, + output_dim=model_config.hidden_dim, + init_weights="truncnormal002", + ) + ) + if model_config.data_specs.surface_feature_dim_total > 0: + self.project_surface_features = LinearProjection( + config=LinearProjectionConfig( + input_dim=model_config.data_specs.surface_feature_dim_total, + output_dim=model_config.hidden_dim, + init_weights="truncnormal002", + ) + ) + if not self.project_volume_features and not self.project_surface_features: + raise ValueError("use_physics_features is True, but both surface and volume feature dims are zero.") + + if model_config.use_output_projection: + # if use_output_projection is True, we assume that the model has an output projection layer. + self.use_output_projection = True + self.norm = nn.LayerNorm(model_config.hidden_dim, eps=1e-6) + self.out = LinearProjection( + config=LinearProjectionConfig( + input_dim=model_config.hidden_dim, output_dim=self.output_dim, init_weights="truncnormal002" + ) + ) + + def output_projection(self, x: torch.Tensor) -> torch.Tensor: + """Most model implementations will have an output projection layer that maps the last latent vector into the output physics space. + We have a unified projection layer that can be used in all models. + + Args: + x: tensor of shape (batch_size, num_points, dim) containing the features for each point. + + Returns: + tensor of shape (batch_size, num_points, output_dim) containing the projected features into (normalized) physics space. + """ + if not self.use_output_projection: + raise ValueError("output_projection called, but use_output_projection is set to False in the model config.") + return self.out(self.norm(x)) + + def surface_and_volume_bias(self, x: torch.Tensor, surface_mask: torch.Tensor) -> torch.Tensor: + """For some of the models, the surface and volume are concatenated into a single input tesnor (e.g., Pointnet, Transolver, Transformer). + For AB-UPT, we shared weight for the physics blocks. Hence, we need to indicate which points are surface and which are volume points. + We do this by applying a bias (i.e., an MLP) to the surface and volume points separately. The surface mask indicates which points are surface points. + This function only works for tensors where surface and volume points are concatenated along the second dimension (not for AB-UPT). + Howerver, for other models, self.surface_bias and self.volume_bias can be called directly in the child class. + + Args: + x: tensor of shape (batch_size, num_points, input_dim) containing the features for each point. + surface_mask: Boolean tensor of shape (batch_size, num_points) indicating which points are surface points. + + Returns: + torch.Tensor: biased tensor x of shape (batch_size, num_points, input_dim) where the surface points have been processed by the surface bias and the volume points by the volume bias. + """ + unbatch = False + if x.ndim == 2: + # if we have a single point, we need to add a batch dimension + unbatch = True + x = x.unsqueeze(0) + + surface_mask = surface_mask[0] # + x_surface = self.surface_bias(x[:, surface_mask.bool(), :]) + x_volume = self.volume_bias(x[:, ~surface_mask.bool(), :]) + x = torch.concat([x_surface, x_volume], dim=1) + if unbatch: + x = x.squeeze(0) + return x + + def gather_outputs(self, x: torch.Tensor, surface_mask: torch.Tensor) -> dict[str, torch.Tensor]: + """The output gathering function is used to extract the relevant outputs from the model's output tensor. + It assumes that the output tensor has a specific structure, where the first dimension corresponds to the batch size, + the second dimension corresponds to the surface/volume points, and the third dimension corresponds to the output features + + The surface pressure is expected to be at index 0, the volume velocity at indices 1:4, + and if the output dimension is 11, the surface wall shear stress is at indices 4:7, + the volume total pressure coefficient at index 7, and the volume vorticity at indices 8:11. These last three are only available + for AhmedML and DriverML datasets. + + Args: + x: output tensor from the model, shape (batch_size, num_points, output_dim) + surface_mask: Indicator boolean tensor for surface points, shape (batch_size, num_points). All surface points should be True, and all volume points should be False. + + Returns: + dict[str, torch.Tensor]: A dictionary containing the gathered outputs: + - "surface_pressure": Tensor of shape (batch_size, num_surface_points, 1) + - "volume_velocity": Tensor of shape (batch_size, num_volume_points, 3) + - "surface_wallshearstress": Tensor of shape (batch_size, num_surface_points, 3) if output_dim is 11 + - "volume_totalpcoeff": Tensor of shape (batch_size, num_volume_points, 1) if output_dim is 11 + - "volume_vorticity": Tensor of shape (batch_size, num_volume_points, 3) if output_dim is 11 + """ + # assumes surface pressure on index 0, 1:4 volume velocity + + surface_mask = surface_mask[0] # we assume the surface mask is the same for all samples in the batch + surface_pressure = x[:, surface_mask.bool(), :1] + volume_velocity = x[ + :, ~surface_mask.bool(), 1:4 + ] # when we only have one volume point, don't compute loss (default of zero is not possible) + + extra_out = {} + if self.output_dim > 4: + assert self.output_dim == 11 + # dim 0: surface pressure, dim 1:4 volume velocity, dim 4:6 surface_wallshearstress, dim 6:7 volume_totalpcoeff, dim 7: 10 volume_vorticity + surface_friction = x[:, surface_mask.bool(), 4:7] + volume_pressure = x[:, ~surface_mask.bool(), 7:8] + volume_vorticity = x[:, ~surface_mask.bool(), 8:11] + + extra_out["surface_friction"] = surface_friction + extra_out["volume_pressure"] = volume_pressure + extra_out["volume_vorticity"] = volume_vorticity + + return { + "surface_pressure": surface_pressure, + "volume_velocity": volume_velocity, + **extra_out, + } + + def _init_weights(self, module: nn.Module) -> None: + """private method to initialize the weights of the model. + + Args: + module: nn.Module to initialize weights for. This is used to initialize the weights of the model. + """ + if isinstance(module, (torch.nn.Linear, torch.nn.Embedding)): + module.weight.data.normal_(mean=0.0, std=0.0002) + if isinstance(module, torch.nn.Linear) and module.bias is not None: + module.bias.data.zero_() + elif isinstance(module, torch.nn.LayerNorm): + module.weight.data.fill_(1.0) + module.bias.data.zero_() diff --git a/src/noether/scaffold/template_files/model/transformer.py b/src/noether/scaffold/template_files/model/transformer.py new file mode 100644 index 00000000..37db5858 --- /dev/null +++ b/src/noether/scaffold/template_files/model/transformer.py @@ -0,0 +1,89 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. +import torch +from __PROJECT__.schemas.models.transformer_config import TransformerConfig + +from noether.core.schemas.modules.layers import ContinuousSincosEmbeddingConfig, RopeFrequencyConfig +from noether.modeling.models import Transformer as TransformerBackbone +from noether.modeling.modules.layers import ContinuousSincosEmbed, RopeFrequency + +from .base import BaseModel + + +class Transformer(BaseModel): + """Implementation of a Transformer model. + + Args: + BaseModel: Base model class that contains the utilities for all models we use in this __PROJECT__. + """ + + def __init__( + self, + model_config: TransformerConfig, + **kwargs, + ): + """ + Args: + model_config: Configuration of the Transformer model. + """ + super().__init__(model_config=model_config, **kwargs) + + self.encoder = ContinuousSincosEmbed( + config=ContinuousSincosEmbeddingConfig(hidden_dim=model_config.hidden_dim, input_dim=3) + ) + + self.use_rope = model_config.use_rope + self.rope = ( + RopeFrequency( + config=RopeFrequencyConfig(hidden_dim=model_config.hidden_dim // model_config.num_heads, input_dim=3) + ) + if self.use_rope + else None + ) + + self.transfomer_backbone = TransformerBackbone(config=model_config) + + def forward( + self, + surface_position: torch.Tensor, + volume_position: torch.Tensor, + surface_features: torch.Tensor | None = None, + volume_features: torch.Tensor | None = None, + ) -> dict[str, torch.Tensor]: + """Forward pass of the Transformer model. + + Args: + surface_position: input coordinates of the surface points. + volume_position: input coordinates of the volume points. + surface_mask_input: surface mask for the input points, indicating which points are surface points. + surface_features: optional input features of the surface points. + volume_features: optional input features of the volume points. + + Returns: + dict[str, torch.Tensor]: dictionary with the output tensors, containing the surface pressure and volume velocity. + """ + surface_mask_input = torch.zeros( + surface_position.shape[0], surface_position.shape[1] + volume_position.shape[1] + ) + surface_mask_input[:, : surface_position.shape[1]] = 1.0 + input_position = torch.concat([surface_position, volume_position], dim=1) + attn_kwargs = {} + + if self.use_rope: + rope = self.rope(input_position) + attn_kwargs["freqs"] = rope + + x = self.encoder(input_position) + + if self.use_physics_features: + surface_features = self.project_surface_features(surface_features) + volume_features = self.project_volume_features(volume_features) + physics_features = torch.concat([surface_features, volume_features], dim=1) + x = x + physics_features + # this step is redudant if the SDF is part of the physics_features. Without SDF, this step is needed. + x = self.surface_and_volume_bias(x=x, surface_mask=surface_mask_input) + + x = self.transfomer_backbone(x=x, attn_kwargs=attn_kwargs) + + x = self.output_projection(x) + + return self.gather_outputs(x=x, surface_mask=surface_mask_input) diff --git a/src/noether/scaffold/template_files/model/transolver.py b/src/noether/scaffold/template_files/model/transolver.py new file mode 100644 index 00000000..b4e7b3cd --- /dev/null +++ b/src/noether/scaffold/template_files/model/transolver.py @@ -0,0 +1,81 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +import torch +from __PROJECT__.schemas.models.transolver_config import TransolverConfig +from torch import nn + +from noether.modeling.models import Transformer as TransformerBackbone + +from .base import BaseModel + + +class Transolver(BaseModel): + """Implementation of the Transolver model. + Reference code: https://github.com/thuml/Transolver/ + Paper: https://arxiv.org/abs/2402.02366 + + Args: + BaseModel: Base model class that contains the utilities for all models we use in this __PROJECT__. + """ + + def __init__( + self, + model_config: TransolverConfig, + **kwargs, + ): + """ + + Args: + model_config: Configuration of the Transolver model. + attn_ctor: Attention constructor + """ + + super().__init__(model_config=model_config, **kwargs) + + # original implementation uses a weird dimension-wise scaling after embed (also not excluded from wd) + # https://github.com/thuml/Transolver/blob/main/Car-Design-ShapeNetCar/models/Transolver.py#L163 + self.placeholder = nn.Parameter(torch.rand(1, 1, model_config.hidden_dim) / model_config.hidden_dim) + + self.transolver_backbone = TransformerBackbone( + config=model_config + ) # Transolver is a Transformer with a different attention mechanism + + def forward( + self, + surface_position: torch.Tensor, + volume_position: torch.Tensor, + surface_features: torch.Tensor | None = None, + volume_features: torch.Tensor | None = None, + ) -> dict[str, torch.Tensor]: + """forward pass of the Transolver model. + + Args: + surface_position: input coordinates of the surface points. + volume_position: input coordinates of the volume points. + surface_mask_input: mask for the input points, indicating which points are surface points. + physics_features: physics features for the input points. Defaults to None. + Returns: + dict[str, torch.Tensor]: dictionary with the output tensors, containing the surface pressure and volume velocity. + """ + surface_mask_input = torch.zeros( + surface_position.shape[0], surface_position.shape[1] + volume_position.shape[1] + ) + surface_mask_input[:, : surface_position.shape[1]] = 1.0 + input_position = torch.concat([surface_position, volume_position], dim=1) + x = self.pos_embed(input_position) + + if self.use_physics_features: + surface_features = self.project_surface_features(surface_features) + volume_features = self.project_volume_features(volume_features) + physics_features = torch.concat([surface_features, volume_features], dim=1) + x = x + physics_features + + x = self.surface_and_volume_bias(x=x, surface_mask=surface_mask_input) + + x = x + self.placeholder + + x = self.transolver_backbone(x=x, attn_kwargs={}) + + x = self.output_projection(x) + + return self.gather_outputs(x=x, surface_mask=surface_mask_input) diff --git a/src/noether/scaffold/template_files/model/upt.py b/src/noether/scaffold/template_files/model/upt.py new file mode 100644 index 00000000..d922eb16 --- /dev/null +++ b/src/noether/scaffold/template_files/model/upt.py @@ -0,0 +1,113 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +import torch +from __PROJECT__.schemas.models import UPTConfig + +from noether.modeling.models import UPT as UPTBackbone + +from .base import BaseModel + + +class UPT(BaseModel): + """Implementation of the UPT (Universal Physics Transformer) model. + + Args: + BaseModel: Base model class that contains the utilities for all models we use in this __PROJECT__. + """ + + def __init__( + self, + model_config: UPTConfig, + **kwargs, + ): + """ + Args: + model_config: Configuration of the tutorial UPT model, extending the core UPTConfig with tutorial-specific parameters. + """ + super().__init__(model_config=model_config, **kwargs) + + self.upt_backbone = UPTBackbone( + config=model_config, + ) + + self.use_bias_layers = model_config.use_bias_layers + self.use_physics_features = model_config.data_specs.use_physics_features + + def forward( + self, + surface_position_batch_idx: torch.Tensor, + surface_position_supernode_idx: torch.Tensor, + surface_position: torch.Tensor, + surface_query_position: torch.Tensor, + volume_query_position: torch.Tensor, + surface_features: torch.Tensor | None = None, + surface_query_features: torch.Tensor | None = None, + volume_query_features: torch.Tensor | None = None, + ) -> dict[str, torch.Tensor]: + """Forward pass of the UPT model. + + Args: + surface_position_batch_idx: Batch indices for the surface positions, since the surface positions are a sparse tensor for the supernode pooling. + surface_position_supernode_idx: Supernode indices for the surface positions. + surface_position: Input coordinates of the surface points. + surface_query_position: Surface query positions. + volume_query_position: Volume query positions. + surface_features: Surface features for the input points. Defaults to None. + surface_query_features: Surface features for the query points. Defaults to None. + volume_query_features: Volume features for the query points. Defaults to None. + + Returns: + dict[str, torch.Tensor]: Dictionary with the output tensors, containing the surface pressure and volume velocity. + """ + + # add features to queries + if surface_features is None: + surface_input_features = None + else: + surface_input_features = surface_features.squeeze( + 0 + ) # remove batch dimension, since we only have one sample + query_position = torch.cat([surface_query_position, volume_query_position], dim=1) + surface_mask_query = torch.zeros(surface_query_position.shape[0], query_position.shape[1]) + surface_mask_query[:, : surface_query_position.shape[1]] = 1.0 + + encoder_attn_kwargs, decoder_attn_kwargs = self.upt_backbone.compute_rope_args( + surface_position_batch_idx, surface_position, surface_position_supernode_idx, query_position + ) + + # supernode pooling encoder + x = self.upt_backbone.encoder( + input_pos=surface_position, + supernode_idx=surface_position_supernode_idx, + batch_idx=surface_position_batch_idx, + input_features=surface_input_features, + ) + # approximator blocks + for block in self.upt_backbone.approximator_blocks: + x = block(x, attn_kwargs=encoder_attn_kwargs) + + queries = self.upt_backbone.pos_embed(query_position) + + if self.use_bias_layers: + queries = self.surface_and_volume_bias(queries, surface_mask_query) + if self.use_physics_features: + surface_query_features = self.project_surface_features(surface_query_features) + volume_query_features = self.project_volume_features(volume_query_features) + physics_query_features = torch.cat([surface_query_features, volume_query_features], dim=1) + queries = queries + physics_query_features + + # perceiver decoder + x = self.upt_backbone.decoder( + kv=x, + queries=queries, + attn_kwargs=decoder_attn_kwargs, + condition=None, + ) + + x = self.upt_backbone.norm(x) + x = self.upt_backbone.prediction_layer(x) + + return self.gather_outputs( + x=x, + surface_mask=surface_mask_query, + ) diff --git a/src/noether/scaffold/template_files/pipeline/__init__.py b/src/noether/scaffold/template_files/pipeline/__init__.py new file mode 100644 index 00000000..e63ecf4f --- /dev/null +++ b/src/noether/scaffold/template_files/pipeline/__init__.py @@ -0,0 +1,3 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from .multistage_pipelines.aero_multistage import AeroMultistagePipeline diff --git a/src/noether/scaffold/template_files/pipeline/collators/__init__.py b/src/noether/scaffold/template_files/pipeline/collators/__init__.py new file mode 100644 index 00000000..8d0c982a --- /dev/null +++ b/src/noether/scaffold/template_files/pipeline/collators/__init__.py @@ -0,0 +1,3 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from .sparse_tensor_offset import SparseTensorOffsetCollator diff --git a/src/noether/scaffold/template_files/pipeline/collators/sparse_tensor_offset.py b/src/noether/scaffold/template_files/pipeline/collators/sparse_tensor_offset.py new file mode 100644 index 00000000..b0a97744 --- /dev/null +++ b/src/noether/scaffold/template_files/pipeline/collators/sparse_tensor_offset.py @@ -0,0 +1,35 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from copy import deepcopy + +import torch + +from noether.data.pipeline.collator import Collator + + +class SparseTensorOffsetCollator(Collator): + """Collates sparse tensors by concatenating them along the first axis and creating an offset tensor that maps + each sample to its respective index in the batch. + """ + + def __init__(self, item: str, offset_key: str): + self.item = item + self.offset_key = offset_key + + def __call__(self, samples: list[dict[str, torch.Tensor]]) -> dict[str, torch.Tensor]: + """Concatenates sparse tensors along the first axis and creates an offset tensor. + + Args: + samples: List of individual samples retrieved from the dataset. + + """ + offset = 0 + samples = [deepcopy(sample) for sample in samples] # copy to avoid changing method input + batch: dict[str, torch.Tensor] = {} + for sample in samples: + cur_num_points = len(sample[self.offset_key]) + sample[self.item] = sample[self.item] + offset + offset += cur_num_points + + batch[self.item] = torch.concat([sample[self.item] for sample in samples]) + return batch diff --git a/src/noether/scaffold/template_files/pipeline/multistage_pipelines/__init__.py b/src/noether/scaffold/template_files/pipeline/multistage_pipelines/__init__.py new file mode 100644 index 00000000..4539a459 --- /dev/null +++ b/src/noether/scaffold/template_files/pipeline/multistage_pipelines/__init__.py @@ -0,0 +1,3 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from .aero_multistage import AeroMultistagePipeline diff --git a/src/noether/scaffold/template_files/pipeline/multistage_pipelines/aero_multistage.py b/src/noether/scaffold/template_files/pipeline/multistage_pipelines/aero_multistage.py new file mode 100644 index 00000000..65dd9b03 --- /dev/null +++ b/src/noether/scaffold/template_files/pipeline/multistage_pipelines/aero_multistage.py @@ -0,0 +1,481 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from __PROJECT__.pipeline.sample_processors import ( + AnchorPointSamplingSampleProcessor, +) +from __PROJECT__.schemas.pipelines.aero_pipeline_config import AeroCFDPipelineConfig + +from noether.data.pipeline import MultiStagePipeline, SampleProcessor +from noether.data.pipeline.collators import ( + ConcatSparseTensorCollator, + DefaultCollator, + SparseTensorOffsetCollator, +) +from noether.data.pipeline.sample_processors import ( + ConcatTensorSampleProcessor, + DefaultTensorSampleProcessor, + DuplicateKeysSampleProcessor, + MomentNormalizationSampleProcessor, + PointSamplingSampleProcessor, + RenameKeysSampleProcessor, + SupernodeSamplingSampleProcessor, +) + + +def _split_by_underscore(item: str) -> list[str]: + return item.split("_") + + +def _split_three_or_none(item: str) -> list[str | None]: + parts = item.split("_") + return parts if len(parts) == 3 else [None] * 3 + + +class DataKeys: + """A central repository for data dictionary keys.""" + + # Base Positions + SURFACE_POS = "surface_position" + VOLUME_POS = "volume_position" + GEOMETRY_POS = "geometry_position" + SURFACE_FEATURES = "surface_features" + VOLUME_FEATURES = "volume_features" + SURFACE_QUERY_FEATURES = "surface_query_features" + VOLUME_QUERY_FEATURES = "volume_query_features" + + # Geometry + GEOMETRY_BATCH_IDX = "geometry_batch_idx" + GEOMETRY_SUPERNODE_IDX = "geometry_supernode_idx" + + @staticmethod + def as_query(key: str) -> str: + """Converts a standard key to its 'query' equivalent.""" + parts = key.split("_") + assert len(parts) == 2, "Key must be in the format '_'" + return f"{parts[0]}_query_{parts[1]}" + + @staticmethod + def as_target(key: str) -> str: + """Converts a standard key to its 'target' equivalent.""" + return f"{key}_target" + + @staticmethod + def as_anchor(key: str) -> str: + """Converts a standard key to its 'anchor' equivalent.""" + parts = key.split("_") + assert len(parts) == 2, "Key must be in the format '_'" + return f"{parts[0]}_anchor_{parts[1]}" + + +class AeroMultistagePipeline(MultiStagePipeline): + """ + A pipeline for the the CFD AeroDynamics dataset AhmedML, DrivAerML, and ShapeNet-Car that handles multi-stage data processing. + """ + + @property + def has_query_points(self) -> bool: + """Check if any query points are specified.""" + return self.num_surface_queries + self.num_volume_queries > 0 + + @property + def use_anchor_points(self) -> bool: + """Check if anchor points are used instead of standard sampling.""" + return self.num_volume_anchor_points > 0 and self.num_surface_anchor_points > 0 + + def __init__( + self, + pipeline_config: AeroCFDPipelineConfig, + **kwargs, + ): + """ + + Args: + pipeline_config: Configuration for the ShapeNet multi-stage pipeline. + """ + + self.dataset_statistics = pipeline_config.dataset_statistics + self.seed = pipeline_config.seed + + # Number of points and queries for point sampling + self.num_surface_points = pipeline_config.num_surface_points + self.num_volume_points = pipeline_config.num_volume_points + self.num_surface_queries = pipeline_config.num_surface_queries + self.num_volume_queries = pipeline_config.num_volume_queries + self.sample_query_points = pipeline_config.sample_query_points + + # UPT specific parameters + self.num_supernodes = pipeline_config.num_supernodes + + # AB-UPT specific parameters + self.num_volume_anchor_points = pipeline_config.num_volume_anchor_points + self.num_surface_anchor_points = pipeline_config.num_surface_anchor_points + self.num_geometry_points = pipeline_config.num_geometry_points + self.num_geometry_supernodes = pipeline_config.num_geometry_supernodes + self.use_query_positions = False + + self.use_physics_features = ( + pipeline_config.use_physics_features + ) # Whether to use physics features (i.e., SDF, normals, etc.) as input to the model. + + self.surface_features = pipeline_config.data_specs.surface_features + self.volume_features = pipeline_config.data_specs.volume_features + self.surface_targets = pipeline_config.data_specs.surface_targets + self.volume_targets = pipeline_config.data_specs.volume_targets + self.conditioning_dims = pipeline_config.data_specs.conditioning_dims + + self._define_items_keys() + + super().__init__( + sample_processors=self._build_sample_processor_pipeline(), + collators=self._build_collator_pipeline(), + batch_processors=self._build_batch_processor_pipeline(), + **kwargs, + ) + + def _define_items_keys(self) -> None: + """ + When sampling input points and queries points, we have to tie certain items together. + For example, the volume points needs to be sampled together with the matching targets and features. + In this methods, we defined which dataset modes are sampled together for the volume and surface points. + Next to that, we also define the query items, which are the same as the sampling items, but with a "query" prefix. + We also define the default pipeline items, which are the items that are always present in the pipeline. + """ + self.volume_sampling_items = ( + { + DataKeys.VOLUME_POS, + } + | self.volume_targets + if self.num_volume_points > 0 + else set() + ) + + self.volume_sampling_items |= self.volume_features if self.use_physics_features else set() + self.surface_sampling_items = ( + { + DataKeys.SURFACE_POS, + } + | self.surface_targets + if self.num_surface_points > 0 + else set() + ) + self.surface_sampling_items |= self.surface_features if self.use_physics_features else set() + + self.volume_query_items = {DataKeys.as_query(item) for item in self.volume_sampling_items} + + self.surface_query_items = {DataKeys.as_query(item) for item in self.surface_sampling_items} + + # By default we collate the input positions and the surface mask of the input points. + self.default_collator_items = ( + [ + DataKeys.VOLUME_POS, + ] + if self.num_volume_anchor_points == 0 + else [] + ) + + if self.num_volume_anchor_points == 0 or self.num_surface_anchor_points == 0: + self.default_collator_items += ( + [DataKeys.SURFACE_POS] + if self.num_supernodes == 0 + else ["surface_query_position", "volume_query_position"] + ) + + # next to that we also collate the physics features, which are the concatenation of the surface and volume features. The targets are also included. + self.default_collator_items += [DataKeys.as_target(item) for item in self.surface_targets | self.volume_targets] + self.default_collator_items += ( + [DataKeys.VOLUME_FEATURES, DataKeys.SURFACE_FEATURES] if self.use_physics_features else [] + ) + self.default_collator_items += ( + [DataKeys.SURFACE_QUERY_FEATURES, DataKeys.VOLUME_QUERY_FEATURES] + if self.has_query_points and self.use_physics_features + else [] + ) + self.default_collator_items += self.conditioning_dims.keys() if self.conditioning_dims else [] + + def _build_sample_processor_pipeline(self) -> list[SampleProcessor]: + """ + Build the sample processor pipeline. + """ + sample_processors = [] + # Some tensors are always present with the same value (i.e., the SDF value on the surface is always 0.0), we first create get the sample processors for these tensors. + sample_processors.extend(self._get_default_tensors_sample_processors()) + # We need to normalize the input tensors individually, so we create the normalizers for the surface and volume tensors. + sample_processors.extend(self._get_normalizer_sample_processors()) + sample_processors.extend(self._get_point_sampling_sample_processors()) + # certain tensors need to be concatenated to create the input tensors for the model + sample_processors.extend(self._get_concatenated_tensors_sample_processors()) + # We need to rename the target tensors to match the model output keys. + sample_processors.extend(self._get_target_renaming_sample_processors()) + + return sample_processors + + def _build_collator_pipeline(self) -> list: + """ + Build the collators. Collators define how the are combined into a batch. + """ + + collators = [] + collators.extend( + [ + DefaultCollator( + items=self.default_collator_items, + ) + ] + ) + if self.num_supernodes > 0: + # if we have supernodes, we have to turn the surface positions into a sparse tensor with batch indices. + collators.extend( + [ + ConcatSparseTensorCollator( + items=["surface_position"], + create_batch_idx=True, + batch_idx_key="surface_position_batch_idx", + ), + SparseTensorOffsetCollator( + item="surface_position_supernode_idx", + offset_key="surface_position", + ), + ] + ) + if self.num_geometry_supernodes: + # if we have geometry supernodes, we have to turn the geometry positions into a sparse tensor with batch indices. + collators.extend( + [ + ConcatSparseTensorCollator( + items=["geometry_position"], + create_batch_idx=True, + batch_idx_key="geometry_batch_idx", + ), + SparseTensorOffsetCollator( + item="geometry_supernode_idx", + offset_key="geometry_position", + ), + ] + ) + return collators + + def _build_batch_processor_pipeline(self) -> list: + """ + Build the batch processors. + """ + return [] + + def _get_normalizer_sample_processors(self) -> list[SampleProcessor]: + """We get different sample processors for the surface and volume tensors. The input coordinates (i.e., positions) are also normalized in a different way.""" + return [ + *self._get_surface_normalizers_sample_processors(), + ] + + def _get_point_sampling_sample_processors(self) -> list[SampleProcessor]: + """ + We now get the point sampling sample processors, which sample the surface and volume points in different ways. + If we use anchor points, we sample the anchor points instead of the standard surface and volume points. + For all the other baselines, we first sample query points (if any) and then sample the input points. + """ + + if self.num_volume_anchor_points > 0 and self.num_surface_anchor_points > 0: + return [*self._get_anchor_point_sampling_sample_processor()] + else: + return [*self._get_query_sampling_sample_processor(), *self._get_input_sampling_sample_processor()] + + def _get_default_tensors_sample_processors(self) -> list[SampleProcessor]: + """Some tensors are always present in the dataset with a default value, so we create a default tensor to create it""" + if self.use_physics_features is False: + return [] + return [ + # the SDF of the surface is always 0.0, so we create a default tensor for it. + DefaultTensorSampleProcessor( + item_key_name="surface_sdf", + matching_item_key="surface_position", + feature_dim=1, + default_value=0.0, + ) + ] + + def _get_surface_normalizers_sample_processors(self) -> list[SampleProcessor]: + """ + Get the normalizer for surface quantities. + """ + if self.use_physics_features is False: + return [] + return [ + MomentNormalizationSampleProcessor( + item="surface_sdf", + mean=self.dataset_statistics.volume_sdf_mean, + std=self.dataset_statistics.volume_sdf_std, + ), + ] + + def _get_input_sampling_sample_processor(self) -> list[SampleProcessor]: + """ + Get the point sampling sample processor. + """ + assert self.num_volume_points + self.num_surface_points > 0, ( + "At least one of num_volume_points or num_surface_points must be greater than 0." + ) + sample_processors = [ + PointSamplingSampleProcessor( + items=self.volume_sampling_items, + num_points=self.num_volume_points, + seed=self.seed, + ), + PointSamplingSampleProcessor( + items=self.surface_sampling_items, + num_points=self.num_surface_points, + seed=self.seed, + ), + ] + if self.has_query_points and not self.sample_query_points: + # we use the same sampling items for the query points as for the surface and volume points + sample_processors.extend( + [ + DuplicateKeysSampleProcessor(key_map={item: DataKeys.as_query(item)}) + for item in self.volume_sampling_items | self.surface_sampling_items + ] + ) + if self.num_supernodes > 0: + sample_processors.append( + SupernodeSamplingSampleProcessor( + item="surface_position", + num_supernodes=self.num_supernodes, + supernode_idx_key="surface_position_supernode_idx", + ) + ) + return sample_processors + + def _get_query_sampling_sample_processor(self) -> list[SampleProcessor]: + """ + Get the query sampling sample processor. + """ + if self.has_query_points and self.sample_query_points: + # we first have to duplicate the keys for the query points, so that we can sample them separately + quey_keymap = { + item: DataKeys.as_query(item) for item in self.surface_sampling_items | self.volume_sampling_items + } + return [ + DuplicateKeysSampleProcessor(key_map=quey_keymap), + PointSamplingSampleProcessor( + items=self.surface_query_items, + num_points=self.num_surface_queries, + seed=self.seed, + ), + PointSamplingSampleProcessor( + items=self.volume_query_items, num_points=self.num_volume_queries, seed=self.seed + ), + ] + + else: + return [] + + def _get_concatenated_tensors_sample_processors(self) -> list[SampleProcessor]: + """ + For most models, the input to the encoder, the query points, and hence the output targets are the concatenation of the surface and volume points. + We concatenate the surface and volume positions, features, and physics features. + """ + sample_processors = [] + if self.use_physics_features: + sample_processors.extend( + [ + ConcatTensorSampleProcessor( + items=self.volume_features, + target_key="volume_features", + dim=1, + ), + ConcatTensorSampleProcessor( + items=self.surface_features, + target_key="surface_features", + dim=1, + ), + ] + ) + + if self.has_query_points: + # if we have query points, we also concatenate the query positions and features + if self.use_physics_features: + sample_processors.extend( + [ + ConcatTensorSampleProcessor( + items={DataKeys.as_query(item) for item in self.volume_features}, + target_key="volume_query_features", + dim=1, + ), + ConcatTensorSampleProcessor( + items={DataKeys.as_query(item) for item in self.surface_features}, + target_key="surface_query_features", + dim=1, + ), + ] + ) + + return sample_processors + + def _get_target_renaming_sample_processors(self) -> list[SampleProcessor]: + """The quantities we predict are the surface pressure and volume velocity, which are the targets of the model. + We rename the surface pressure and volume velocity to match the model output keys. + """ + if self.has_query_points: + return [ + DuplicateKeysSampleProcessor( + key_map={DataKeys.as_query(target): DataKeys.as_target(target) for target in self.volume_targets} + ), + DuplicateKeysSampleProcessor( + key_map={DataKeys.as_query(target): DataKeys.as_target(target) for target in self.surface_targets} + ), + ] + else: + return [ + DuplicateKeysSampleProcessor( + key_map={target: DataKeys.as_target(target) for target in self.volume_targets} + ), + DuplicateKeysSampleProcessor( + key_map={target: DataKeys.as_target(target) for target in self.surface_targets} + ), + ] + + def _get_anchor_point_sampling_sample_processor(self) -> list[SampleProcessor]: + """Get the anchor point sampling sample processor.""" + if self.num_volume_anchor_points > 0 and self.num_surface_anchor_points > 0: + # make sure defa + self.default_collator_items += [ + "surface_anchor_position", + "volume_anchor_position", + ] + return [ + DuplicateKeysSampleProcessor(key_map={"surface_position": "geometry_position"}), + PointSamplingSampleProcessor( + items={"geometry_position"}, + num_points=self.num_geometry_points, + seed=None if self.seed is None else self.seed + 1, + ), + SupernodeSamplingSampleProcessor( + item="geometry_position", + num_supernodes=self.num_geometry_supernodes, + supernode_idx_key="geometry_supernode_idx", + seed=None if self.seed is None else self.seed + 2, + ), + # subsample surface data + AnchorPointSamplingSampleProcessor( + items={"surface_position"} | set(self.surface_targets), + num_points=self.num_surface_anchor_points, + keep_queries=self.use_query_positions, + to_prefix_and_postfix=_split_by_underscore, + to_prefix_midfix_postfix=_split_three_or_none, + seed=None if self.seed is None else self.seed + 3, + ), + # subsample volume data + AnchorPointSamplingSampleProcessor( + items={"volume_position"} | set(self.volume_targets), + num_points=self.num_volume_anchor_points, + keep_queries=self.use_query_positions, + to_prefix_and_postfix=_split_by_underscore, + to_prefix_midfix_postfix=_split_three_or_none, + seed=None if self.seed is None else self.seed + 4, + ), + RenameKeysSampleProcessor(key_map={DataKeys.as_anchor(key): key for key in self.volume_targets}), + RenameKeysSampleProcessor(key_map={DataKeys.as_anchor(key): key for key in self.surface_targets}), + ] + + else: + raise ValueError( + "Anchor point sampling requires both num_volume_anchor_points and num_surface_anchor_points to be greater than 0." + ) diff --git a/src/noether/scaffold/template_files/pipeline/sample_processors/__init__.py b/src/noether/scaffold/template_files/pipeline/sample_processors/__init__.py new file mode 100644 index 00000000..6f27575f --- /dev/null +++ b/src/noether/scaffold/template_files/pipeline/sample_processors/__init__.py @@ -0,0 +1,3 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from .anchor_point_sampling import AnchorPointSamplingSampleProcessor diff --git a/src/noether/scaffold/template_files/pipeline/sample_processors/anchor_point_sampling.py b/src/noether/scaffold/template_files/pipeline/sample_processors/anchor_point_sampling.py new file mode 100644 index 00000000..b2943e38 --- /dev/null +++ b/src/noether/scaffold/template_files/pipeline/sample_processors/anchor_point_sampling.py @@ -0,0 +1,88 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from collections.abc import Callable +from typing import Any + +import torch + +from noether.data import SampleProcessor + + +class AnchorPointSamplingSampleProcessor(SampleProcessor): + """Randomly subsamples points from a pointcloud.""" + + def __init__( + self, + items: set[str], + num_points: int, + to_prefix_and_postfix: Callable[[str], tuple[str, str]], + to_prefix_midfix_postfix: Callable[[str], tuple[str, str, str]], + keep_queries: bool = False, + seed: int | None = None, + ): + """ + Args: + items: Which pointcloud items should be subsampled (e.g., input_position, output_position, ...). If multiple + items are present, the subsampling will use identical indices for all items (e.g., to downsample + output_position and output_pressure with the same subsampling). + num_points: Number of points to sample. + seed: Random seed for deterministic sampling for evaluation. Default None (i.e., no seed). If not None, + requires sample index to be present in batch. + """ + if not num_points >= 0: + raise ValueError("Number of points to sample must be non-negative.") + + self.items = items + self.num_points = num_points + self.keep_queries = keep_queries + self.to_prefix_and_postfix = to_prefix_and_postfix + self.to_prefix_midfix_postfix = to_prefix_midfix_postfix + self.seed = seed + + def __call__(self, input_sample: dict[str, Any]) -> dict[str, Any]: + """Subsamples the pointclouds identified by `self.items` with the same subsampling. The outer list and dicts + are copied explicitly, the Any objects are not. However, the subsampled tensors are "copied" implicitly as + sampling is implemented via random index access, which implicitly creates a copy of the underlying values. + + Args: + input_sample: Ssample retrieved from the dataset. + + Returns: + Preprocessed copy of `input_sample`. + """ + + # copy to avoid changing method input + output_sample = self.save_copy(input_sample) + + # apply preprocessing + any_item = next(iter(self.items)) + + # create perm + if self.seed is not None: + if "index" not in output_sample: + raise ValueError("Sample index is required for deterministic point sampling with a seed.") + seed = output_sample["index"] + self.seed + generator = torch.Generator().manual_seed(seed) + else: + generator = None + first_item_tensor = output_sample[any_item] + assert torch.is_tensor(first_item_tensor) + if self.keep_queries: + perm = torch.randperm(len(first_item_tensor), generator=generator) + if len(first_item_tensor) <= self.num_points: + discarded_perm = None + else: + discarded_perm = perm[self.num_points :] + perm = perm[: self.num_points] + else: + perm = torch.randperm(len(first_item_tensor), generator=generator)[: self.num_points] + discarded_perm = None + # subsample + for item in self.items: + tensor = output_sample[item] + prefix, postfix = self.to_prefix_and_postfix(item) + output_sample[f"{prefix}_anchor_{postfix}"] = tensor[perm] + if discarded_perm is not None: + output_sample[f"{prefix}_query_{postfix}"] = tensor[discarded_perm] + + return output_sample diff --git a/src/noether/scaffold/template_files/schemas/__init__.py b/src/noether/scaffold/template_files/schemas/__init__.py new file mode 100644 index 00000000..aede2f25 --- /dev/null +++ b/src/noether/scaffold/template_files/schemas/__init__.py @@ -0,0 +1 @@ +# Copyright © 2026 Emmi AI GmbH. All rights reserved. diff --git a/src/noether/scaffold/template_files/schemas/callbacks/__init__.py b/src/noether/scaffold/template_files/schemas/callbacks/__init__.py new file mode 100644 index 00000000..24cca59b --- /dev/null +++ b/src/noether/scaffold/template_files/schemas/callbacks/__init__.py @@ -0,0 +1,7 @@ +# Copyright © 2026 Emmi AI GmbH. All rights reserved. + +from typing import Union + +from .callback_config import SurfaceVolumeEvaluationMetricsCallbackConfig + +ProjectCallbacksConfig = Union[SurfaceVolumeEvaluationMetricsCallbackConfig] diff --git a/src/noether/scaffold/template_files/schemas/callbacks/callback_config.py b/src/noether/scaffold/template_files/schemas/callbacks/callback_config.py new file mode 100644 index 00000000..8355699c --- /dev/null +++ b/src/noether/scaffold/template_files/schemas/callbacks/callback_config.py @@ -0,0 +1,37 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from typing import Literal + +from pydantic import Field, model_validator + +from noether.core.schemas.callbacks import PeriodicDataIteratorCallbackConfig + + +class SurfaceVolumeEvaluationMetricsCallbackConfig(PeriodicDataIteratorCallbackConfig): + name: Literal["SurfaceVolumeEvaluationMetricsCallback"] = "SurfaceVolumeEvaluationMetricsCallback" + + forward_properties: list[str] = [] + """List of properties in the dataset to be forwarded during inference.""" + chunked_inference: bool = False + "If True, perform inference in chunks over the full simulation geometry" + chunk_properties: list[str] = [] + """List of properties in the dataset to be chunked use for chunked. Some properties don't need to be chunked.""" + batch_size: int = Field(1) + """Batch size for evaluation. Currently only batch_size=1 is supported.""" + chunk_size: int | None = None + """Size of each chunk when performing chunked inference. Usually equal to the number of surface/volume points during training""" + sample_size_property: str | None = Field(None) + """Property in the batch to determine the sample size (i.e., the size of either the surface or volume mesh) to know how many chunks to make""" + + @model_validator(mode="after") + def validate_config(self) -> "SurfaceVolumeEvaluationMetricsCallbackConfig": + if self.batch_size != 1: + raise ValueError("SurfaceVolumeEvaluationMetricsCallback only supports batch_size=1") + if self.chunked_inference: + if self.chunk_size is None: + raise ValueError("chunk_size must be specified when chunked_inference is True") + if not self.forward_properties: + raise ValueError("forward_properties must be specified when chunked_inference is True") + if not self.chunk_properties: + raise ValueError("chunk_properties must be specified when chunked_inference is True") + return self diff --git a/src/noether/scaffold/template_files/schemas/config_schema.py b/src/noether/scaffold/template_files/schemas/config_schema.py new file mode 100644 index 00000000..6684f419 --- /dev/null +++ b/src/noether/scaffold/template_files/schemas/config_schema.py @@ -0,0 +1,19 @@ +# Copyright © 2026 Emmi AI GmbH. All rights reserved. + +from __PROJECT__.schemas.datasets import AeroDatasetConfig +from pydantic import Field + +from noether.core.schemas import ConfigSchema as ConfigSchemaBase +from noether.core.schemas.dataset import AeroDataSpecs +from noether.core.schemas.statistics import AeroStatsSchema + +from .models.any_model_config import AnyModelConfig +from .trainers.automotive_aerodynamics_trainer_config import AutomotiveAerodynamicsCfdTrainerConfig + + +class ConfigSchema(ConfigSchemaBase): + data_specs: AeroDataSpecs + model: AnyModelConfig = Field(..., discriminator="name") + trainer: AutomotiveAerodynamicsCfdTrainerConfig + datasets: dict[str, AeroDatasetConfig] + dataset_statistics: AeroStatsSchema | None = None diff --git a/src/noether/scaffold/template_files/schemas/datasets/__init__.py b/src/noether/scaffold/template_files/schemas/datasets/__init__.py new file mode 100644 index 00000000..bb928606 --- /dev/null +++ b/src/noether/scaffold/template_files/schemas/datasets/__init__.py @@ -0,0 +1,3 @@ +# Copyright © 2026 Emmi AI GmbH. All rights reserved. + +from .aero_dataset_config import AeroDatasetConfig diff --git a/src/noether/scaffold/template_files/schemas/datasets/aero_dataset_config.py b/src/noether/scaffold/template_files/schemas/datasets/aero_dataset_config.py new file mode 100644 index 00000000..a18fb0d7 --- /dev/null +++ b/src/noether/scaffold/template_files/schemas/datasets/aero_dataset_config.py @@ -0,0 +1,10 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from __PROJECT__.schemas.pipelines.aero_pipeline_config import AeroCFDPipelineConfig + +from noether.core.schemas.dataset import StandardDatasetConfig + + +class AeroDatasetConfig(StandardDatasetConfig): + pipeline: AeroCFDPipelineConfig + filter_categories: tuple[str] | None = None diff --git a/src/noether/scaffold/template_files/schemas/models/ab_upt_config.py b/src/noether/scaffold/template_files/schemas/models/ab_upt_config.py new file mode 100644 index 00000000..339f8cf7 --- /dev/null +++ b/src/noether/scaffold/template_files/schemas/models/ab_upt_config.py @@ -0,0 +1,11 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from typing import Literal + +from noether.core.schemas.models import AnchorBranchedUPTConfig + +from .base_config import BaseModelConfig + + +class ABUPTConfig(BaseModelConfig, AnchorBranchedUPTConfig): + name: Literal["ab_upt"] = "ab_upt" diff --git a/src/noether/scaffold/template_files/schemas/models/base_config.py b/src/noether/scaffold/template_files/schemas/models/base_config.py new file mode 100644 index 00000000..6a0eda16 --- /dev/null +++ b/src/noether/scaffold/template_files/schemas/models/base_config.py @@ -0,0 +1,24 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from typing import Literal + +from pydantic import BaseModel, Field + +from noether.core.schemas.dataset import AeroDataSpecs + + +class BaseModelConfig(BaseModel): + name: str = Field(...) + """Name of the model, also used as identifier when saving/loading checkpoints and finding the correct model schema.""" + hidden_dim: int = Field(...) + """Hidden dimension of the model.""" + kind: str = Field(...) + """Kind of model to use, i.e. class path (tutorials.models.).""" + position_projection: Literal["linear", "sincos"] = "sincos" + """String to indicate the type of position projection to use. Can be "sincos" or "linear". Defaults to "sincos".""" + use_output_projection: bool = False + """Boolean to indicate to use the output projection. Defaults to False.""" + use_bias_layers: bool = Field(True) + """Boolean to indicate to use bias layers. Defaults to True.""" + data_specs: AeroDataSpecs + """Data specifications for the model. If None, default data specifications will be used.""" diff --git a/src/noether/scaffold/template_files/schemas/models/transformer_config.py b/src/noether/scaffold/template_files/schemas/models/transformer_config.py new file mode 100644 index 00000000..0c966e49 --- /dev/null +++ b/src/noether/scaffold/template_files/schemas/models/transformer_config.py @@ -0,0 +1,11 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from typing import Literal + +from noether.core.schemas.models import TransformerConfig + +from .base_config import BaseModelConfig + + +class TransformerConfig(BaseModelConfig, TransformerConfig): + name: Literal["transformer"] = "transformer" diff --git a/src/noether/scaffold/template_files/schemas/models/transolver_config.py b/src/noether/scaffold/template_files/schemas/models/transolver_config.py new file mode 100644 index 00000000..267fb3bb --- /dev/null +++ b/src/noether/scaffold/template_files/schemas/models/transolver_config.py @@ -0,0 +1,13 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from typing import Literal + +from noether.core.schemas.models import TransolverConfig + +from .base_config import BaseModelConfig + + +class TransolverConfig(BaseModelConfig, TransolverConfig): + """expansion factor for the MLP.""" + + name: Literal["transolver"] = "transolver" diff --git a/src/noether/scaffold/template_files/schemas/models/upt_config.py b/src/noether/scaffold/template_files/schemas/models/upt_config.py new file mode 100644 index 00000000..15252f54 --- /dev/null +++ b/src/noether/scaffold/template_files/schemas/models/upt_config.py @@ -0,0 +1,11 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from typing import Literal + +from noether.core.schemas.models import UPTConfig + +from .base_config import BaseModelConfig + + +class UPTConfig(BaseModelConfig, UPTConfig): + name: Literal["upt"] = "upt" diff --git a/src/noether/scaffold/template_files/schemas/pipelines/__init__.py b/src/noether/scaffold/template_files/schemas/pipelines/__init__.py new file mode 100644 index 00000000..aede2f25 --- /dev/null +++ b/src/noether/scaffold/template_files/schemas/pipelines/__init__.py @@ -0,0 +1 @@ +# Copyright © 2026 Emmi AI GmbH. All rights reserved. diff --git a/src/noether/scaffold/template_files/schemas/pipelines/aero_pipeline_config.py b/src/noether/scaffold/template_files/schemas/pipelines/aero_pipeline_config.py new file mode 100644 index 00000000..ffb89dea --- /dev/null +++ b/src/noether/scaffold/template_files/schemas/pipelines/aero_pipeline_config.py @@ -0,0 +1,38 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from pydantic import BaseModel + +from noether.core.schemas.dataset import AeroDataSpecs +from noether.core.schemas.statistics import AeroStatsSchema + + +class AeroCFDPipelineConfig(BaseModel): + kind: str + num_surface_points: int + """Number of surface points we sample as input for the model. """ + num_volume_points: int + """Number of volume points we sample as input for the model.""" + num_surface_queries: int | None = None + """ Number of surface queries we use to query the output function. Defaults to 0. If set to 0, no query points are sampled.""" + num_volume_queries: int | None = None + """ Number of volume queries we use to query the output function. Defaults to 0. If set to 0, no query points are sampled.""" + use_physics_features: bool = False + """ Whether to use physics features next to input coordinates (i.e., SDF and normal vectors). Defaults to False.""" + dataset_statistics: AeroStatsSchema | None = None + """Dataset statistics (mean, std, max, min, etc) for normalization of input features.""" + sample_query_points: bool = True + """Whether to sample query points. Defaults to True. If False, the query points are simply duplicated from the surface and volume points that serve as inputs for the encoder. This only applies for models that can query (e.g., UPT).""" + num_supernodes: int = 0 + """ Number of supernodes (for UPT). """ + num_geometry_supernodes: int | None = None + """ Number of geometry supernodes (for AB-UPT). """ + num_geometry_points: int | None = None + """ Number of geometry points to sample (for AB-UPT). """ + num_volume_anchor_points: int | None = 0 + """ Number of volume anchor points to sample for AB-UPT. Defaults to 0.""" + num_surface_anchor_points: int | None = 0 + """Number of surface anchor points to sample for AB-UPT. Defaults to 0.""" + seed: int | None = None + """Random seed for for processes that involve sampling (e.g., point sampling). Defaults to None.""" + data_specs: AeroDataSpecs + """Data specifications for the pipeline.""" diff --git a/src/noether/scaffold/template_files/schemas/trainers/__init__.py b/src/noether/scaffold/template_files/schemas/trainers/__init__.py new file mode 100644 index 00000000..6fe8e458 --- /dev/null +++ b/src/noether/scaffold/template_files/schemas/trainers/__init__.py @@ -0,0 +1,3 @@ +# Copyright © 2026 Emmi AI GmbH. All rights reserved. + +from .automotive_aerodynamics_trainer_config import AutomotiveAerodynamicsCfdTrainerConfig diff --git a/src/noether/scaffold/template_files/schemas/trainers/automotive_aerodynamics_trainer_config.py b/src/noether/scaffold/template_files/schemas/trainers/automotive_aerodynamics_trainer_config.py new file mode 100644 index 00000000..f7c77d4f --- /dev/null +++ b/src/noether/scaffold/template_files/schemas/trainers/automotive_aerodynamics_trainer_config.py @@ -0,0 +1,33 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from typing import Annotated, Union + +from __PROJECT__.schemas.callbacks import ProjectCallbacksConfig +from pydantic import Field + +from noether.core.schemas.callbacks import CallbacksConfig +from noether.core.schemas.trainers import BaseTrainerConfig + +AllCallbacks = Union[ + ProjectCallbacksConfig, CallbacksConfig +] # custom callbacks need to be added here to one union type with the base noether CallbacksConfig + + +class AutomotiveAerodynamicsCfdTrainerConfig(BaseTrainerConfig): + surface_weight: float = 1.0 + """ Weight of the predicted values on the surface mesh. Defaults to 1.0..""" + volume_weight: float = 1.0 + """Weight of the predicted values in the volume. Defaults to 1.0.""" + surface_pressure_weight: float = 1.0 + """Weight of the predicted values for the surface pressure. Defaults to 1.0.""" + surface_friction_weight: float = 0.0 + """Weight of the predicted values for the surface wall shear stress. Defaults to 0.0.""" + volume_velocity_weight: float = 1.0 + """Weight of the predicted values for the volume velocity. Defaults to 1.0.""" + volume_pressure_weight: float = 0.0 + """Weight of the predicted values for the volume total pressure coefficient. Defaults to 0.0.""" + volume_vorticity_weight: float = 0.0 + """Weight of the predicted values for the volume vorticity. Defaults to 0.0.""" + use_physics_features: bool = False + """ If true, additional features are used next to the input coordidates (i.e., SDF, surfacer normals, etc.). Defaults to False.""" + callbacks: list[Annotated[AllCallbacks, Field(discriminator="name")]] | None = None diff --git a/src/noether/scaffold/template_files/trainers/__init__.py b/src/noether/scaffold/template_files/trainers/__init__.py new file mode 100644 index 00000000..12066171 --- /dev/null +++ b/src/noether/scaffold/template_files/trainers/__init__.py @@ -0,0 +1,3 @@ +# Copyright © 2026 Emmi AI GmbH. All rights reserved. + +from .automotive_aerodynamics_cfd import AutomotiveAerodynamicsCFDTrainer diff --git a/src/noether/scaffold/template_files/trainers/automotive_aerodynamics_cfd.py b/src/noether/scaffold/template_files/trainers/automotive_aerodynamics_cfd.py new file mode 100644 index 00000000..97dd4424 --- /dev/null +++ b/src/noether/scaffold/template_files/trainers/automotive_aerodynamics_cfd.py @@ -0,0 +1,85 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from __future__ import annotations + +import torch +import torch.nn.functional as F +from __PROJECT__.schemas.trainers import AutomotiveAerodynamicsCfdTrainerConfig + +from noether.training.trainers import BaseTrainer + + +class AutomotiveAerodynamicsCFDTrainer(BaseTrainer): + """Trainer class for to train automative aerodynaimcs CFD for the: AhmedML, DrivaerML and Shapenet-Car Car dataset.""" + + def __init__(self, trainer_config: AutomotiveAerodynamicsCfdTrainerConfig, **kwargs): + """Trainer class for to train automative aerodynaimcs CFD for the: AhmedML, DrivaerML and Shapenet-Car Car dataset. + + Args: + trainer_config: Configuration for the trainer. + **kwargs: Additional keyword arguments for the SgdTrainer. + + Raises: + ValueError: When an output mode is not defined in the loss items. + """ + super().__init__( + config=trainer_config, + **kwargs, + ) + + self.surface_pressure_weight = trainer_config.surface_pressure_weight + self.surface_friction_weight = trainer_config.surface_friction_weight + self.volume_velocity_weight = trainer_config.volume_velocity_weight + self.volume_pressure_weight = trainer_config.volume_pressure_weight + self.volume_vorticity_weight = trainer_config.volume_vorticity_weight + + self.surface_weight = trainer_config.surface_weight + self.volume_weight = trainer_config.volume_weight + + loss_items = { + "surface_pressure": (self.surface_pressure_weight, self.surface_weight), + "surface_friction": ( + self.surface_friction_weight, + self.surface_weight, + ), # not used for ShapeNet-Car + "volume_velocity": (self.volume_velocity_weight, self.volume_weight), + "volume_pressure": (self.volume_pressure_weight, self.volume_weight), # not used for ShapeNet-Car + "volume_vorticity": (self.volume_vorticity_weight, self.volume_weight), # not used for ShapeNet-Car + } + + self.loss_items = [] + for target_property in self.target_properties: + if target_property[: -len("_target")] not in loss_items: + raise ValueError(f"Output mode '{target_property}' is not defined in loss items.") + self.loss_items.append( + ( + target_property[: -len("_target")], + loss_items[target_property[: -len("_target")]][0], + loss_items[target_property[: -len("_target")]][1], + ) + ) + + def loss_compute( + self, forward_output: dict[str, torch.Tensor], targets: dict[str, torch.Tensor] + ) -> dict[str, torch.Tensor]: + """Given the output of the model and the targets, compute the losses. + Args: + forward_output The output of the model, containing the predictions for each output mode. + targets: Dict containing all target values to compute the loss. + + Returns: + A dictionary containing the computed losses for each output mode. + """ + losses: dict[str, torch.Tensor] = {} + for item, weight, group_weight in self.loss_items: + if weight > 0 and group_weight > 0 and item in forward_output: + if f"{item}_target" not in targets: + raise ValueError( + f"Target for '{item}' not found in targets. Ensure the targets contain the correct keys." + ) + losses[f"{item}_loss"] = ( + F.mse_loss(targets[f"{item}_target"], forward_output[item]) * weight * group_weight + ) + if len(losses) == 0: + raise ValueError("No losses computed, check your output keys and loss function.") + return losses diff --git a/tests/test_scaffold.py b/tests/test_scaffold.py new file mode 100644 index 00000000..6285d9c8 --- /dev/null +++ b/tests/test_scaffold.py @@ -0,0 +1,127 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from __future__ import annotations + +import itertools +from pathlib import Path + +import pytest +import yaml + +from noether.scaffold.choices import DatasetChoice, HardwareChoice, ModelChoice, OptimizerChoice, TrackerChoice +from noether.scaffold.config import resolve_config +from noether.scaffold.generator import generate_project + +MODELS = list(ModelChoice) +# Generate a representative subset: each model with a shapenet and a caeml dataset +COMBOS = list(itertools.product(MODELS, [DatasetChoice.SHAPENET_CAR, DatasetChoice.AHMEDML])) + + +@pytest.mark.parametrize(("model", "dataset"), COMBOS, ids=[f"{m.value}-{d.value}" for m, d in COMBOS]) +def test_generate_project(tmp_path: Path, model: ModelChoice, dataset: DatasetChoice) -> None: + project_name = "test_proj" + proj = tmp_path / project_name + config = resolve_config( + project_name=project_name, + model=model, + dataset=dataset, + dataset_path="/tmp/fake_data", + optimizer=OptimizerChoice.ADAMW, + tracker=TrackerChoice.DISABLED, + hardware=HardwareChoice.GPU, + project_dir=proj, + wandb_entity=None, + ) + + generate_project(config) + + # All expected directories exist + assert (proj / "configs").is_dir() + assert (proj / "model").is_dir() + assert (proj / "schemas").is_dir() + + # No leftover tutorial references in generated .py or .yaml files + for ext in ("*.py", "*.yaml"): + for f in proj.rglob(ext): + content = f.read_text() + assert "tutorial." not in content, f"Found 'tutorial.' in {f.relative_to(proj)}" + + # All YAML files parse without error + for yf in proj.rglob("*.yaml"): + content = yf.read_text() + if not content.strip(): + continue + # Strip Hydra directives before parsing + lines = [line for line in content.splitlines() if not line.startswith("# @package")] + try: + yaml.safe_load("\n".join(lines)) + except yaml.YAMLError as e: + pytest.fail(f"YAML parse error in {yf.relative_to(proj)}: {e}") + + # All kind: values start with project name, a known framework prefix, or are Hydra interpolations + known_prefixes = (f"{project_name}.", "noether.", "torch.", "${") + for yf in proj.rglob("*.yaml"): + content = yf.read_text() + lines = content.splitlines() + for line in lines: + stripped = line.strip() + if stripped.startswith("kind:"): + kind_value = stripped.split(":", 1)[1].strip().strip("'\"") + if kind_value: + assert any(kind_value.startswith(p) for p in known_prefixes), ( + f"Unexpected kind '{kind_value}' in {yf.relative_to(proj)}" + ) + + # No unresolved placeholders + for ext in ("*.py", "*.yaml"): + for f in proj.rglob(ext): + content = f.read_text() + assert "__PROJECT__" not in content, f"Unresolved __PROJECT__ in {f.relative_to(proj)}" + assert "__CLASS__" not in content, f"Unresolved __CLASS__ in {f.relative_to(proj)}" + assert "__DATASET_PATH__" not in content, f"Unresolved __DATASET_PATH__ in {f.relative_to(proj)}" + assert "__OPTIMIZER__" not in content, f"Unresolved __OPTIMIZER__ in {f.relative_to(proj)}" + assert "__TRACKER__" not in content, f"Unresolved __TRACKER__ in {f.relative_to(proj)}" + + +def test_hardware_mps_sets_accelerator(tmp_path: Path) -> None: + """Non-default hardware should write accelerator to train.yaml.""" + config = resolve_config( + project_name="mps_test", + model=ModelChoice.UPT, + dataset=DatasetChoice.SHAPENET_CAR, + dataset_path="/tmp/fake", + optimizer=OptimizerChoice.ADAMW, + tracker=TrackerChoice.DISABLED, + hardware=HardwareChoice.MPS, + project_dir=tmp_path / "mps_test", + wandb_entity=None, + ) + + generate_project(config) + + train_yaml = tmp_path / "mps_test" / "configs" / "train.yaml" + lines = [line for line in train_yaml.read_text().splitlines() if not line.startswith("# @package")] + data = yaml.safe_load("\n".join(lines)) + assert data.get("accelerator") == "mps" + + +def test_gpu_default_no_accelerator(tmp_path: Path) -> None: + """Default GPU hardware should not write accelerator key.""" + config = resolve_config( + project_name="gpu_test", + model=ModelChoice.UPT, + dataset=DatasetChoice.SHAPENET_CAR, + dataset_path="/tmp/fake", + optimizer=OptimizerChoice.ADAMW, + tracker=TrackerChoice.DISABLED, + hardware=HardwareChoice.GPU, + project_dir=tmp_path / "gpu_test", + wandb_entity=None, + ) + + generate_project(config) + + train_yaml = tmp_path / "gpu_test" / "configs" / "train.yaml" + lines = [line for line in train_yaml.read_text().splitlines() if not line.startswith("# @package")] + data = yaml.safe_load("\n".join(lines)) + assert "accelerator" not in data From 86a27d7b97881aa9d4838ae51523b74f9da458c7 Mon Sep 17 00:00:00 2001 From: David Hauser Date: Wed, 11 Mar 2026 10:04:50 +0100 Subject: [PATCH 02/12] Address PR feedback --- src/noether/scaffold/cli.py | 50 +++--- src/noether/scaffold/file_copier.py | 225 -------------------------- src/noether/scaffold/file_manager.py | 226 +++++++++++++++++++++++++++ src/noether/scaffold/generator.py | 8 +- 4 files changed, 254 insertions(+), 255 deletions(-) delete mode 100644 src/noether/scaffold/file_copier.py create mode 100644 src/noether/scaffold/file_manager.py diff --git a/src/noether/scaffold/cli.py b/src/noether/scaffold/cli.py index d9ecf924..b233ab08 100644 --- a/src/noether/scaffold/cli.py +++ b/src/noether/scaffold/cli.py @@ -18,7 +18,12 @@ @app.command() def main( - project_name: Annotated[str, typer.Argument(help="Project name (valid Python identifier)")], + project_name: Annotated[ + str, + typer.Argument( + help="Project name (valid Python identifier). Examples: 'my_project', 'MyProject1'). No hyphens allowed." + ), + ], model: Annotated[ModelChoice, typer.Option("--model", "-m", help="Model architecture")] = ..., # type: ignore[assignment] dataset: Annotated[DatasetChoice, typer.Option("--dataset", "-d", help="Dataset")] = ..., # type: ignore[assignment] dataset_path: Annotated[str, typer.Option("--dataset-path", help="Path to dataset")] = ..., # type: ignore[assignment] @@ -29,7 +34,7 @@ def main( hardware: Annotated[HardwareChoice, typer.Option("--hardware", help="Hardware target")] = HardwareChoice.GPU, project_dir: Annotated[Path, typer.Option("--project-dir", "-l", help="Where to create project dir")] = Path("."), wandb_entity: Annotated[ - str | None, typer.Option("--wandb-entity", help="W&B entity (required if tracker=wandb)") + str | None, typer.Option("--wandb-entity", help="W&B entity, e.g. 'my-team' (defaults to your W&B username)") ] = None, ) -> None: """Scaffold a new Noether training project.""" @@ -38,11 +43,6 @@ def main( typer.echo(f"Error: '{project_name}' is not a valid Python identifier.", err=True) raise typer.Exit(1) - # Validate if wandb has entity set - if tracker == TrackerChoice.WANDB and not wandb_entity: - typer.echo("Error: --wandb-entity is required when --tracker=wandb.", err=True) - raise typer.Exit(1) - # Resolve to absolute path project_dir = (project_dir / project_name).resolve() @@ -73,26 +73,24 @@ def main( def _print_summary(config: ScaffoldConfig) -> None: - typer.echo("") - typer.echo("Project created successfully!") - typer.echo("") - typer.echo("Configuration:") - typer.echo(f" Project: {config.project_name}") - typer.echo(f" Model: {config.model.value}") - typer.echo(f" Dataset: {config.dataset.value}") - typer.echo(f" Optimizer: {config.optimizer.value}") - typer.echo(f" Tracker: {config.tracker.value}") - typer.echo(f" Hardware: {config.hardware.value}") - typer.echo(f" Path: {config.project_dir}") - typer.echo("") - + typer.echo( + "\nProject created successfully!\n" + "Configuration:\n" + f" Project: {config.project_name}\n" + f" Model: {config.model.value}\n" + f" Dataset: {config.dataset.value}\n" + f" Optimizer: {config.optimizer.value}\n" + f" Tracker: {config.tracker.value}\n" + f" Hardware: {config.hardware.value}\n" + f" Path: {config.project_dir}\n" + ) # Suggest run command - typer.echo("To train, run:") - typer.echo(f" uv run noether-train --config-dir {config.project_dir}/configs \\") - typer.echo(f" --config-name train +experiment={config.model.value}") - typer.echo("") - typer.echo("Experiment configs for all models are in configs/experiment/.") - typer.echo("") + typer.echo( + "To train, run:\n" + f" uv run noether-train --config-dir {config.project_dir}/configs \\ \n" + f" --config-name train +experiment={config.model.value}\n\n" + "Experiment configs for all models are in configs/experiment/." + ) if __name__ == "__main__": diff --git a/src/noether/scaffold/file_copier.py b/src/noether/scaffold/file_copier.py deleted file mode 100644 index 3e80bf16..00000000 --- a/src/noether/scaffold/file_copier.py +++ /dev/null @@ -1,225 +0,0 @@ -# Copyright © 2025 Emmi AI GmbH. All rights reserved. - -from __future__ import annotations - -from importlib.resources.abc import Traversable -from pathlib import Path - -from .choices import HardwareChoice, ModelChoice -from .config import TEMPLATES, ScaffoldConfig, substitute - - -def _write(path: Path, content: str) -> None: - """Write *content* to *path*, creating parent directories as needed.""" - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(content) - - -def _copy_template_with_substitution( - template_file: Traversable, destination_path: Path, config: ScaffoldConfig -) -> None: - """Copy a template file with placeholder substitution.""" - destination_path.parent.mkdir(parents=True, exist_ok=True) - content = template_file.read_text() - destination_path.write_text(substitute(content, config)) - - -def _copy_verbatim(template_file: Traversable, destination_path: Path) -> None: - """Copy a template file verbatim (no substitution).""" - destination_path.parent.mkdir(parents=True, exist_ok=True) - destination_path.write_text(template_file.read_text()) - - -def copy_python_files(config: ScaffoldConfig) -> None: - """Copy template Python files into the new project with substitutions.""" - tpl = TEMPLATES - project_dir = config.project_dir - - # --- Model files (model-specific) --- - _copy_template_with_substitution(tpl / "model" / "base.py", project_dir / "model" / "base.py", config) - _copy_template_with_substitution( - tpl / "schemas" / "models" / "base_config.py", - project_dir / "schemas" / "models" / "base_config.py", - config, - ) - _copy_template_with_substitution( - tpl / "model" / f"{config.model.module_name}.py", - project_dir / "model" / f"{config.model.module_name}.py", - config, - ) - _copy_template_with_substitution( - tpl / "schemas" / "models" / f"{config.model.schema_module}.py", - project_dir / "schemas" / "models" / f"{config.model.schema_module}.py", - config, - ) - - # --- Infrastructure files (with __PROJECT__ substitution) --- - _copy_template_with_substitution(tpl / "pipeline" / "__init__.py", project_dir / "pipeline" / "__init__.py", config) - _copy_template_with_substitution( - tpl / "pipeline" / "collators" / "__init__.py", - project_dir / "pipeline" / "collators" / "__init__.py", - config, - ) - _copy_template_with_substitution( - tpl / "pipeline" / "collators" / "sparse_tensor_offset.py", - project_dir / "pipeline" / "collators" / "sparse_tensor_offset.py", - config, - ) - _copy_template_with_substitution( - tpl / "pipeline" / "multistage_pipelines" / "__init__.py", - project_dir / "pipeline" / "multistage_pipelines" / "__init__.py", - config, - ) - _copy_template_with_substitution( - tpl / "pipeline" / "multistage_pipelines" / "aero_multistage.py", - project_dir / "pipeline" / "multistage_pipelines" / "aero_multistage.py", - config, - ) - _copy_template_with_substitution( - tpl / "pipeline" / "sample_processors" / "__init__.py", - project_dir / "pipeline" / "sample_processors" / "__init__.py", - config, - ) - _copy_template_with_substitution( - tpl / "pipeline" / "sample_processors" / "anchor_point_sampling.py", - project_dir / "pipeline" / "sample_processors" / "anchor_point_sampling.py", - config, - ) - _copy_template_with_substitution( - tpl / "trainers" / "automotive_aerodynamics_cfd.py", - project_dir / "trainers" / "automotive_aerodynamics_cfd.py", - config, - ) - _copy_template_with_substitution( - tpl / "callbacks" / "surface_volume_evaluation_metrics.py", - project_dir / "callbacks" / "surface_volume_evaluation_metrics.py", - config, - ) - _copy_template_with_substitution( - tpl / "schemas" / "datasets" / "aero_dataset_config.py", - project_dir / "schemas" / "datasets" / "aero_dataset_config.py", - config, - ) - _copy_template_with_substitution( - tpl / "schemas" / "pipelines" / "aero_pipeline_config.py", - project_dir / "schemas" / "pipelines" / "aero_pipeline_config.py", - config, - ) - _copy_template_with_substitution( - tpl / "schemas" / "trainers" / "automotive_aerodynamics_trainer_config.py", - project_dir / "schemas" / "trainers" / "automotive_aerodynamics_trainer_config.py", - config, - ) - _copy_template_with_substitution( - tpl / "schemas" / "callbacks" / "callback_config.py", - project_dir / "schemas" / "callbacks" / "callback_config.py", - config, - ) - _copy_template_with_substitution( - tpl / "schemas" / "config_schema.py", project_dir / "schemas" / "config_schema.py", config - ) - - # --- Static init files (verbatim copies) --- - _copy_verbatim(tpl / "callbacks" / "__init__.py", project_dir / "callbacks" / "__init__.py") - _copy_verbatim(tpl / "trainers" / "__init__.py", project_dir / "trainers" / "__init__.py") - _copy_verbatim(tpl / "schemas" / "__init__.py", project_dir / "schemas" / "__init__.py") - _copy_verbatim(tpl / "schemas" / "datasets" / "__init__.py", project_dir / "schemas" / "datasets" / "__init__.py") - _copy_verbatim(tpl / "schemas" / "pipelines" / "__init__.py", project_dir / "schemas" / "pipelines" / "__init__.py") - _copy_verbatim(tpl / "schemas" / "trainers" / "__init__.py", project_dir / "schemas" / "trainers" / "__init__.py") - _copy_verbatim(tpl / "schemas" / "callbacks" / "__init__.py", project_dir / "schemas" / "callbacks" / "__init__.py") - - -def generate_python_files(config: ScaffoldConfig) -> None: - """Generate dynamic Python files that depend on model choice.""" - proj = config.project_dir - - # --- Empty __init__.py files --- - _write(proj / "__init__.py", "") - _write(proj / "configs" / "__init__.py", "") - - # --- schemas/models/any_model_config.py (depends on model choice) --- - cfg_cls = config.model.config_class_name - schema_mod = config.model.schema_module - _write( - proj / "schemas" / "models" / "any_model_config.py", - f"from typing import Union\n\nfrom .{schema_mod} import {cfg_cls}\n\nAnyModelConfig = Union[{cfg_cls}]\n", - ) - - # --- schemas/models/__init__.py (depends on model choice) --- - _write( - proj / "schemas" / "models" / "__init__.py", - f"from .{config.model.schema_module} import {config.model.config_class_name}\n", - ) - - # --- model/__init__.py (depends on model choice) --- - _write( - proj / "model" / "__init__.py", - f"from .{config.model.module_name} import {config.model.class_name}\n", - ) - - -def copy_yaml_configs(config: ScaffoldConfig) -> None: - """Copy all YAML config files into the new project.""" - tpl = TEMPLATES / "configs" - dst = config.project_dir / "configs" - ref = config.reference - - # --- Verbatim copies (data_specs, normalizers, statistics, datasets, optimizer) --- - verbatim = [ - ("data_specs", ref.get("data_specs_file")), - ("dataset_normalizers", ref.get("normalizers_file")), - ("dataset_statistics", ref.get("statistics_file")), - ("datasets", ref.get("dataset_config_file")), - ] - for subdir, filename in verbatim: - if filename: - _copy_verbatim(tpl / subdir / f"{filename}.yaml", dst / subdir / f"{filename}.yaml") - - _copy_verbatim( - tpl / "optimizer" / f"{config.optimizer.value}.yaml", dst / "optimizer" / f"{config.optimizer.value}.yaml" - ) - - # --- With substitution (model, pipeline, trainer, callbacks, tracker, train) --- - _copy_template_with_substitution( - tpl / "model" / f"{config.model.value}.yaml", dst / "model" / f"{config.model.value}.yaml", config - ) - - pipeline_file = ref.get("pipeline_file") - if pipeline_file: - _copy_template_with_substitution( - tpl / "pipeline" / f"{pipeline_file}.yaml", dst / "pipeline" / f"{pipeline_file}.yaml", config - ) - - trainer_file = ref.get("trainer_config_file") - if trainer_file: - _copy_template_with_substitution( - tpl / "trainer" / f"{trainer_file}.yaml", dst / "trainer" / f"{trainer_file}.yaml", config - ) - - callbacks_file = ref.get("callbacks_file") - if callbacks_file: - _copy_template_with_substitution( - tpl / "callbacks" / f"{callbacks_file}.yaml", dst / "callbacks" / f"{callbacks_file}.yaml", config - ) - - _copy_template_with_substitution( - tpl / "tracker" / f"{config.tracker.value}.yaml", dst / "tracker" / f"{config.tracker.value}.yaml", config - ) - - # --- Train YAML (per-dataset template) --- - _copy_template_with_substitution(tpl / f"train_{config.dataset.value}.yaml", dst / "train.yaml", config) - - # Append accelerator for non-GPU hardware - if config.hardware != HardwareChoice.GPU: - train_path = dst / "train.yaml" - content = train_path.read_text() - train_path.write_text(content + f"accelerator: {config.hardware.value}\n") - - # --- Experiment configs (all 4 models for the dataset's category) --- - category = ref.get("experiment_category", "shapenet") - for model in ModelChoice: - _copy_template_with_substitution( - tpl / "experiment" / category / f"{model.value}.yaml", - dst / "experiment" / f"{model.value}.yaml", - config, - ) diff --git a/src/noether/scaffold/file_manager.py b/src/noether/scaffold/file_manager.py new file mode 100644 index 00000000..401ff590 --- /dev/null +++ b/src/noether/scaffold/file_manager.py @@ -0,0 +1,226 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from __future__ import annotations + +from importlib.resources.abc import Traversable +from pathlib import Path + +from .choices import HardwareChoice, ModelChoice +from .config import TEMPLATES, ScaffoldConfig, substitute + + +class FileManager: + """Manages file operations for project scaffolding.""" + + @staticmethod + def _write(path: Path, content: str) -> None: + """Write *content* to *path*, creating parent directories as needed.""" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content) + + @staticmethod + def _copy_template_with_substitution( + template_file: Traversable, destination_path: Path, config: ScaffoldConfig + ) -> None: + """Copy a template file with placeholder substitution.""" + destination_path.parent.mkdir(parents=True, exist_ok=True) + content = template_file.read_text() + destination_path.write_text(substitute(content, config)) + + @staticmethod + def _copy_verbatim(template_file: Traversable, destination_path: Path) -> None: + """Copy a template file verbatim (no substitution).""" + destination_path.parent.mkdir(parents=True, exist_ok=True) + destination_path.write_text(template_file.read_text()) + + @staticmethod + def copy_python_files(config: ScaffoldConfig) -> None: + """Copy template Python files into the new project with substitutions.""" + tpl = TEMPLATES + project_dir = config.project_dir + _copy = FileManager._copy_template_with_substitution + _verbatim = FileManager._copy_verbatim + + # --- Model files (model-specific) --- + _copy(tpl / "model" / "base.py", project_dir / "model" / "base.py", config) + _copy( + tpl / "schemas" / "models" / "base_config.py", + project_dir / "schemas" / "models" / "base_config.py", + config, + ) + _copy( + tpl / "model" / f"{config.model.module_name}.py", + project_dir / "model" / f"{config.model.module_name}.py", + config, + ) + _copy( + tpl / "schemas" / "models" / f"{config.model.schema_module}.py", + project_dir / "schemas" / "models" / f"{config.model.schema_module}.py", + config, + ) + + # --- Infrastructure files (with __PROJECT__ substitution) --- + _copy(tpl / "pipeline" / "__init__.py", project_dir / "pipeline" / "__init__.py", config) + _copy( + tpl / "pipeline" / "collators" / "__init__.py", + project_dir / "pipeline" / "collators" / "__init__.py", + config, + ) + _copy( + tpl / "pipeline" / "collators" / "sparse_tensor_offset.py", + project_dir / "pipeline" / "collators" / "sparse_tensor_offset.py", + config, + ) + _copy( + tpl / "pipeline" / "multistage_pipelines" / "__init__.py", + project_dir / "pipeline" / "multistage_pipelines" / "__init__.py", + config, + ) + _copy( + tpl / "pipeline" / "multistage_pipelines" / "aero_multistage.py", + project_dir / "pipeline" / "multistage_pipelines" / "aero_multistage.py", + config, + ) + _copy( + tpl / "pipeline" / "sample_processors" / "__init__.py", + project_dir / "pipeline" / "sample_processors" / "__init__.py", + config, + ) + _copy( + tpl / "pipeline" / "sample_processors" / "anchor_point_sampling.py", + project_dir / "pipeline" / "sample_processors" / "anchor_point_sampling.py", + config, + ) + _copy( + tpl / "trainers" / "automotive_aerodynamics_cfd.py", + project_dir / "trainers" / "automotive_aerodynamics_cfd.py", + config, + ) + _copy( + tpl / "callbacks" / "surface_volume_evaluation_metrics.py", + project_dir / "callbacks" / "surface_volume_evaluation_metrics.py", + config, + ) + _copy( + tpl / "schemas" / "datasets" / "aero_dataset_config.py", + project_dir / "schemas" / "datasets" / "aero_dataset_config.py", + config, + ) + _copy( + tpl / "schemas" / "pipelines" / "aero_pipeline_config.py", + project_dir / "schemas" / "pipelines" / "aero_pipeline_config.py", + config, + ) + _copy( + tpl / "schemas" / "trainers" / "automotive_aerodynamics_trainer_config.py", + project_dir / "schemas" / "trainers" / "automotive_aerodynamics_trainer_config.py", + config, + ) + _copy( + tpl / "schemas" / "callbacks" / "callback_config.py", + project_dir / "schemas" / "callbacks" / "callback_config.py", + config, + ) + _copy(tpl / "schemas" / "config_schema.py", project_dir / "schemas" / "config_schema.py", config) + + # --- Static init files (verbatim copies) --- + _verbatim(tpl / "callbacks" / "__init__.py", project_dir / "callbacks" / "__init__.py") + _verbatim(tpl / "trainers" / "__init__.py", project_dir / "trainers" / "__init__.py") + _verbatim(tpl / "schemas" / "__init__.py", project_dir / "schemas" / "__init__.py") + _verbatim(tpl / "schemas" / "datasets" / "__init__.py", project_dir / "schemas" / "datasets" / "__init__.py") + _verbatim(tpl / "schemas" / "pipelines" / "__init__.py", project_dir / "schemas" / "pipelines" / "__init__.py") + _verbatim(tpl / "schemas" / "trainers" / "__init__.py", project_dir / "schemas" / "trainers" / "__init__.py") + _verbatim(tpl / "schemas" / "callbacks" / "__init__.py", project_dir / "schemas" / "callbacks" / "__init__.py") + + @staticmethod + def generate_python_files(config: ScaffoldConfig) -> None: + """Generate dynamic Python files that depend on model choice.""" + proj = config.project_dir + _write = FileManager._write + + # --- Empty __init__.py files --- + _write(proj / "__init__.py", "") + _write(proj / "configs" / "__init__.py", "") + + # --- schemas/models/any_model_config.py (depends on model choice) --- + cfg_cls = config.model.config_class_name + schema_mod = config.model.schema_module + _write( + proj / "schemas" / "models" / "any_model_config.py", + f"from typing import Union\n\nfrom .{schema_mod} import {cfg_cls}\n\nAnyModelConfig = Union[{cfg_cls}]\n", + ) + + # --- schemas/models/__init__.py (depends on model choice) --- + _write( + proj / "schemas" / "models" / "__init__.py", + f"from .{config.model.schema_module} import {config.model.config_class_name}\n", + ) + + # --- model/__init__.py (depends on model choice) --- + _write( + proj / "model" / "__init__.py", + f"from .{config.model.module_name} import {config.model.class_name}\n", + ) + + @staticmethod + def copy_yaml_configs(config: ScaffoldConfig) -> None: + """Copy all YAML config files into the new project.""" + tpl = TEMPLATES / "configs" + dst = config.project_dir / "configs" + ref = config.reference + _copy = FileManager._copy_template_with_substitution + _verbatim = FileManager._copy_verbatim + + # --- Verbatim copies (data_specs, normalizers, statistics, datasets, optimizer) --- + verbatim = [ + ("data_specs", ref.get("data_specs_file")), + ("dataset_normalizers", ref.get("normalizers_file")), + ("dataset_statistics", ref.get("statistics_file")), + ("datasets", ref.get("dataset_config_file")), + ] + for subdir, filename in verbatim: + if filename: + _verbatim(tpl / subdir / f"{filename}.yaml", dst / subdir / f"{filename}.yaml") + + _verbatim( + tpl / "optimizer" / f"{config.optimizer.value}.yaml", dst / "optimizer" / f"{config.optimizer.value}.yaml" + ) + + # --- With substitution (model, pipeline, trainer, callbacks, tracker, train) --- + _copy(tpl / "model" / f"{config.model.value}.yaml", dst / "model" / f"{config.model.value}.yaml", config) + + pipeline_file = ref.get("pipeline_file") + if pipeline_file: + _copy(tpl / "pipeline" / f"{pipeline_file}.yaml", dst / "pipeline" / f"{pipeline_file}.yaml", config) + + trainer_file = ref.get("trainer_config_file") + if trainer_file: + _copy(tpl / "trainer" / f"{trainer_file}.yaml", dst / "trainer" / f"{trainer_file}.yaml", config) + + callbacks_file = ref.get("callbacks_file") + if callbacks_file: + _copy(tpl / "callbacks" / f"{callbacks_file}.yaml", dst / "callbacks" / f"{callbacks_file}.yaml", config) + + _copy( + tpl / "tracker" / f"{config.tracker.value}.yaml", + dst / "tracker" / f"{config.tracker.value}.yaml", + config, + ) + + # --- Train YAML (per-dataset template) --- + _copy(tpl / f"train_{config.dataset.value}.yaml", dst / "train.yaml", config) + + # Append accelerator for non-GPU hardware + if config.hardware != HardwareChoice.GPU: + train_path = dst / "train.yaml" + content = train_path.read_text() + train_path.write_text(content + f"accelerator: {config.hardware.value}\n") + + # --- Experiment configs (all 4 models for the dataset's category) --- + category = ref.get("experiment_category", "shapenet") + for model in ModelChoice: + _copy( + tpl / "experiment" / category / f"{model.value}.yaml", + dst / "experiment" / f"{model.value}.yaml", + config, + ) diff --git a/src/noether/scaffold/generator.py b/src/noether/scaffold/generator.py index c572ec48..c535c091 100644 --- a/src/noether/scaffold/generator.py +++ b/src/noether/scaffold/generator.py @@ -3,11 +3,11 @@ from __future__ import annotations from .config import ScaffoldConfig -from .file_copier import copy_python_files, copy_yaml_configs, generate_python_files +from .file_manager import FileManager def generate_project(config: ScaffoldConfig) -> None: """Orchestrate full project generation.""" - copy_python_files(config) - generate_python_files(config) - copy_yaml_configs(config) + FileManager.copy_python_files(config) + FileManager.generate_python_files(config) + FileManager.copy_yaml_configs(config) From dab3d65de09bf0351b855de94159adc207cf5aa8 Mon Sep 17 00:00:00 2001 From: David Hauser Date: Wed, 11 Mar 2026 10:48:09 +0100 Subject: [PATCH 03/12] fix printing noether-train command without line break / --- src/noether/scaffold/cli.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/noether/scaffold/cli.py b/src/noether/scaffold/cli.py index b233ab08..876f822b 100644 --- a/src/noether/scaffold/cli.py +++ b/src/noether/scaffold/cli.py @@ -87,8 +87,7 @@ def _print_summary(config: ScaffoldConfig) -> None: # Suggest run command typer.echo( "To train, run:\n" - f" uv run noether-train --config-dir {config.project_dir}/configs \\ \n" - f" --config-name train +experiment={config.model.value}\n\n" + f" uv run noether-train --config-dir {config.project_dir}/configs --config-name train +experiment={config.model.value}\n\n" "Experiment configs for all models are in configs/experiment/." ) From 30a90f31f0c27d4084a04935566768e6ec35cbb3 Mon Sep 17 00:00:00 2001 From: David Hauser Date: Wed, 11 Mar 2026 11:26:58 +0100 Subject: [PATCH 04/12] fix: adapt to breaking changes from PR#110 --- .../template_files/configs/model/transformer.yaml | 7 ++++--- .../template_files/configs/model/transolver.yaml | 5 +++-- .../scaffold/template_files/model/transformer.py | 9 +++------ .../template_files/schemas/models/base_config.py | 2 -- .../schemas/models/transformer_config.py | 12 ++++++++++++ 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/noether/scaffold/template_files/configs/model/transformer.yaml b/src/noether/scaffold/template_files/configs/model/transformer.yaml index 2d953eab..695fcd84 100644 --- a/src/noether/scaffold/template_files/configs/model/transformer.yaml +++ b/src/noether/scaffold/template_files/configs/model/transformer.yaml @@ -1,12 +1,13 @@ kind: __PROJECT__.model.Transformer name: transformer hidden_dim: 192 +transformer_block_config: + num_heads: 3 + mlp_expansion_factor: 4 + use_rope: true depth: 12 -num_heads: 3 -mlp_expansion_factor: 4 optimizer_config: ${optimizer} use_output_projection: true -use_rope: true data_specs: ${data_specs} forward_properties: - surface_position diff --git a/src/noether/scaffold/template_files/configs/model/transolver.yaml b/src/noether/scaffold/template_files/configs/model/transolver.yaml index 5d48f1f3..2e31d1ab 100644 --- a/src/noether/scaffold/template_files/configs/model/transolver.yaml +++ b/src/noether/scaffold/template_files/configs/model/transolver.yaml @@ -2,10 +2,11 @@ kind: __PROJECT__.model.Transolver name: transolver hidden_dim: 192 depth: 12 -num_heads: 3 +transformer_block_config: + mlp_expansion_factor: 4 + num_heads: 3 attention_arguments: num_slices: 512 -mlp_expansion_factor: 4 use_output_projection: true optimizer_config: ${optimizer} data_specs: ${data_specs} diff --git a/src/noether/scaffold/template_files/model/transformer.py b/src/noether/scaffold/template_files/model/transformer.py index 37db5858..71a2d2f4 100644 --- a/src/noether/scaffold/template_files/model/transformer.py +++ b/src/noether/scaffold/template_files/model/transformer.py @@ -2,7 +2,6 @@ import torch from __PROJECT__.schemas.models.transformer_config import TransformerConfig -from noether.core.schemas.modules.layers import ContinuousSincosEmbeddingConfig, RopeFrequencyConfig from noether.modeling.models import Transformer as TransformerBackbone from noether.modeling.modules.layers import ContinuousSincosEmbed, RopeFrequency @@ -27,14 +26,12 @@ def __init__( """ super().__init__(model_config=model_config, **kwargs) - self.encoder = ContinuousSincosEmbed( - config=ContinuousSincosEmbeddingConfig(hidden_dim=model_config.hidden_dim, input_dim=3) - ) + self.encoder = ContinuousSincosEmbed(config=model_config.pos_encoding_config) - self.use_rope = model_config.use_rope + self.use_rope = model_config.transformer_block_config.use_rope self.rope = ( RopeFrequency( - config=RopeFrequencyConfig(hidden_dim=model_config.hidden_dim // model_config.num_heads, input_dim=3) + config=model_config.rope_frequency_config # type: ignore[union-attr] ) if self.use_rope else None diff --git a/src/noether/scaffold/template_files/schemas/models/base_config.py b/src/noether/scaffold/template_files/schemas/models/base_config.py index 6a0eda16..8da805ab 100644 --- a/src/noether/scaffold/template_files/schemas/models/base_config.py +++ b/src/noether/scaffold/template_files/schemas/models/base_config.py @@ -10,8 +10,6 @@ class BaseModelConfig(BaseModel): name: str = Field(...) """Name of the model, also used as identifier when saving/loading checkpoints and finding the correct model schema.""" - hidden_dim: int = Field(...) - """Hidden dimension of the model.""" kind: str = Field(...) """Kind of model to use, i.e. class path (tutorials.models.).""" position_projection: Literal["linear", "sincos"] = "sincos" diff --git a/src/noether/scaffold/template_files/schemas/models/transformer_config.py b/src/noether/scaffold/template_files/schemas/models/transformer_config.py index 0c966e49..dc0781da 100644 --- a/src/noether/scaffold/template_files/schemas/models/transformer_config.py +++ b/src/noether/scaffold/template_files/schemas/models/transformer_config.py @@ -2,10 +2,22 @@ from typing import Literal +from pydantic import computed_field + from noether.core.schemas.models import TransformerConfig +from noether.core.schemas.modules.layers.continuous_sincos_embedding import ContinuousSincosEmbeddingConfig +from noether.core.schemas.modules.layers.rope_frequency import RopeFrequencyConfig from .base_config import BaseModelConfig class TransformerConfig(BaseModelConfig, TransformerConfig): name: Literal["transformer"] = "transformer" + + @computed_field + def pos_encoding_config(self) -> ContinuousSincosEmbeddingConfig: + return ContinuousSincosEmbeddingConfig(hidden_dim=self.transformer_block_config.hidden_dim, input_dim=3) + + @computed_field + def rope_frequency_config(self) -> RopeFrequencyConfig: + return RopeFrequencyConfig(hidden_dim=self.hidden_dim // self.transformer_block_config.num_heads, input_dim=3) From f23360a45c2baac0a91f6ec044843f356bd26577 Mon Sep 17 00:00:00 2001 From: David Hauser Date: Wed, 11 Mar 2026 13:04:26 +0100 Subject: [PATCH 05/12] add tests for noether-init scaffolding --- tests/test_scaffold.py | 127 ------------------ tests/unit/noether/scaffold/__init__.py | 1 + tests/unit/noether/scaffold/test_cli.py | 78 +++++++++++ tests/unit/noether/scaffold/test_config.py | 87 ++++++++++++ tests/unit/noether/scaffold/test_generator.py | 70 ++++++++++ 5 files changed, 236 insertions(+), 127 deletions(-) delete mode 100644 tests/test_scaffold.py create mode 100644 tests/unit/noether/scaffold/__init__.py create mode 100644 tests/unit/noether/scaffold/test_cli.py create mode 100644 tests/unit/noether/scaffold/test_config.py create mode 100644 tests/unit/noether/scaffold/test_generator.py diff --git a/tests/test_scaffold.py b/tests/test_scaffold.py deleted file mode 100644 index 6285d9c8..00000000 --- a/tests/test_scaffold.py +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright © 2025 Emmi AI GmbH. All rights reserved. - -from __future__ import annotations - -import itertools -from pathlib import Path - -import pytest -import yaml - -from noether.scaffold.choices import DatasetChoice, HardwareChoice, ModelChoice, OptimizerChoice, TrackerChoice -from noether.scaffold.config import resolve_config -from noether.scaffold.generator import generate_project - -MODELS = list(ModelChoice) -# Generate a representative subset: each model with a shapenet and a caeml dataset -COMBOS = list(itertools.product(MODELS, [DatasetChoice.SHAPENET_CAR, DatasetChoice.AHMEDML])) - - -@pytest.mark.parametrize(("model", "dataset"), COMBOS, ids=[f"{m.value}-{d.value}" for m, d in COMBOS]) -def test_generate_project(tmp_path: Path, model: ModelChoice, dataset: DatasetChoice) -> None: - project_name = "test_proj" - proj = tmp_path / project_name - config = resolve_config( - project_name=project_name, - model=model, - dataset=dataset, - dataset_path="/tmp/fake_data", - optimizer=OptimizerChoice.ADAMW, - tracker=TrackerChoice.DISABLED, - hardware=HardwareChoice.GPU, - project_dir=proj, - wandb_entity=None, - ) - - generate_project(config) - - # All expected directories exist - assert (proj / "configs").is_dir() - assert (proj / "model").is_dir() - assert (proj / "schemas").is_dir() - - # No leftover tutorial references in generated .py or .yaml files - for ext in ("*.py", "*.yaml"): - for f in proj.rglob(ext): - content = f.read_text() - assert "tutorial." not in content, f"Found 'tutorial.' in {f.relative_to(proj)}" - - # All YAML files parse without error - for yf in proj.rglob("*.yaml"): - content = yf.read_text() - if not content.strip(): - continue - # Strip Hydra directives before parsing - lines = [line for line in content.splitlines() if not line.startswith("# @package")] - try: - yaml.safe_load("\n".join(lines)) - except yaml.YAMLError as e: - pytest.fail(f"YAML parse error in {yf.relative_to(proj)}: {e}") - - # All kind: values start with project name, a known framework prefix, or are Hydra interpolations - known_prefixes = (f"{project_name}.", "noether.", "torch.", "${") - for yf in proj.rglob("*.yaml"): - content = yf.read_text() - lines = content.splitlines() - for line in lines: - stripped = line.strip() - if stripped.startswith("kind:"): - kind_value = stripped.split(":", 1)[1].strip().strip("'\"") - if kind_value: - assert any(kind_value.startswith(p) for p in known_prefixes), ( - f"Unexpected kind '{kind_value}' in {yf.relative_to(proj)}" - ) - - # No unresolved placeholders - for ext in ("*.py", "*.yaml"): - for f in proj.rglob(ext): - content = f.read_text() - assert "__PROJECT__" not in content, f"Unresolved __PROJECT__ in {f.relative_to(proj)}" - assert "__CLASS__" not in content, f"Unresolved __CLASS__ in {f.relative_to(proj)}" - assert "__DATASET_PATH__" not in content, f"Unresolved __DATASET_PATH__ in {f.relative_to(proj)}" - assert "__OPTIMIZER__" not in content, f"Unresolved __OPTIMIZER__ in {f.relative_to(proj)}" - assert "__TRACKER__" not in content, f"Unresolved __TRACKER__ in {f.relative_to(proj)}" - - -def test_hardware_mps_sets_accelerator(tmp_path: Path) -> None: - """Non-default hardware should write accelerator to train.yaml.""" - config = resolve_config( - project_name="mps_test", - model=ModelChoice.UPT, - dataset=DatasetChoice.SHAPENET_CAR, - dataset_path="/tmp/fake", - optimizer=OptimizerChoice.ADAMW, - tracker=TrackerChoice.DISABLED, - hardware=HardwareChoice.MPS, - project_dir=tmp_path / "mps_test", - wandb_entity=None, - ) - - generate_project(config) - - train_yaml = tmp_path / "mps_test" / "configs" / "train.yaml" - lines = [line for line in train_yaml.read_text().splitlines() if not line.startswith("# @package")] - data = yaml.safe_load("\n".join(lines)) - assert data.get("accelerator") == "mps" - - -def test_gpu_default_no_accelerator(tmp_path: Path) -> None: - """Default GPU hardware should not write accelerator key.""" - config = resolve_config( - project_name="gpu_test", - model=ModelChoice.UPT, - dataset=DatasetChoice.SHAPENET_CAR, - dataset_path="/tmp/fake", - optimizer=OptimizerChoice.ADAMW, - tracker=TrackerChoice.DISABLED, - hardware=HardwareChoice.GPU, - project_dir=tmp_path / "gpu_test", - wandb_entity=None, - ) - - generate_project(config) - - train_yaml = tmp_path / "gpu_test" / "configs" / "train.yaml" - lines = [line for line in train_yaml.read_text().splitlines() if not line.startswith("# @package")] - data = yaml.safe_load("\n".join(lines)) - assert "accelerator" not in data diff --git a/tests/unit/noether/scaffold/__init__.py b/tests/unit/noether/scaffold/__init__.py new file mode 100644 index 00000000..aede2f25 --- /dev/null +++ b/tests/unit/noether/scaffold/__init__.py @@ -0,0 +1 @@ +# Copyright © 2026 Emmi AI GmbH. All rights reserved. diff --git a/tests/unit/noether/scaffold/test_cli.py b/tests/unit/noether/scaffold/test_cli.py new file mode 100644 index 00000000..3504c8e7 --- /dev/null +++ b/tests/unit/noether/scaffold/test_cli.py @@ -0,0 +1,78 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from __future__ import annotations + +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from noether.scaffold.cli import app + +runner = CliRunner() + + +@pytest.mark.parametrize("bad_name", ["123bad", "with-hyphen", "has space"], ids=["leading-digit", "hyphen", "space"]) +def test_invalid_project_name_rejected(tmp_path: Path, bad_name: str) -> None: + result = runner.invoke( + app, + [ + bad_name, + "--model", + "upt", + "--dataset", + "shapenet_car", + "--dataset-path", + "/tmp/x", + "--project-dir", + str(tmp_path), + ], + ) + assert result.exit_code == 1 + assert "not a valid Python identifier" in result.output + + +def test_existing_directory_rejected(tmp_path: Path) -> None: + project_dir = tmp_path / "existing_proj" + project_dir.mkdir() + result = runner.invoke( + app, + [ + "existing_proj", + "--model", + "upt", + "--dataset", + "shapenet_car", + "--dataset-path", + "/tmp/x", + "--project-dir", + str(tmp_path), + ], + ) + assert result.exit_code == 1 + assert "Directory already exists" in result.output + + +def test_valid_invocation_succeeds(tmp_path: Path) -> None: + result = runner.invoke( + app, + [ + "my_project", + "--model", + "upt", + "--dataset", + "shapenet_car", + "--dataset-path", + "/tmp/fake_data", + "--project-dir", + str(tmp_path), + ], + ) + assert result.exit_code == 0, result.output + assert (tmp_path / "my_project").is_dir() + assert (tmp_path / "my_project" / "callbacks").is_dir() + assert (tmp_path / "my_project" / "configs").is_dir() + assert (tmp_path / "my_project" / "model").is_dir() + assert (tmp_path / "my_project" / "pipeline").is_dir() + assert (tmp_path / "my_project" / "schemas").is_dir() + assert (tmp_path / "my_project" / "trainers").is_dir() diff --git a/tests/unit/noether/scaffold/test_config.py b/tests/unit/noether/scaffold/test_config.py new file mode 100644 index 00000000..14c554bc --- /dev/null +++ b/tests/unit/noether/scaffold/test_config.py @@ -0,0 +1,87 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from noether.scaffold.choices import DatasetChoice, HardwareChoice, ModelChoice, OptimizerChoice, TrackerChoice +from noether.scaffold.config import ScaffoldConfig, load_reference, resolve_config, substitute + +# --------------------------------------------------------------------------- +# substitute() +# --------------------------------------------------------------------------- + + +def test_substitute_replaces_all_placeholders() -> None: + config = ScaffoldConfig( + project_name="my_proj", + model=ModelChoice.UPT, + dataset=DatasetChoice.SHAPENET_CAR, + dataset_path="/data/shapenet", + optimizer=OptimizerChoice.ADAMW, + tracker=TrackerChoice.WANDB, + hardware=HardwareChoice.GPU, + project_dir=Path("/tmp/my_proj"), + wandb_entity=None, + ) + template = ( + "kind: __PROJECT__.model.UPT\ndataset_root: __DATASET_PATH__\noptimizer: __OPTIMIZER__\ntracker: __TRACKER__" + ) + result = substitute(template, config) + + assert result == "kind: my_proj.model.UPT\ndataset_root: /data/shapenet\noptimizer: adamw\ntracker: wandb" + + +# --------------------------------------------------------------------------- +# load_reference() +# --------------------------------------------------------------------------- + +REFERENCE_KEYS = { + "experiment_category", + "data_specs_file", + "normalizers_file", + "statistics_file", + "pipeline_file", + "dataset_config_file", + "trainer_config_file", + "callbacks_file", +} + + +@pytest.mark.parametrize("dataset", list(DatasetChoice), ids=[d.value for d in DatasetChoice]) +def test_load_reference_returns_expected_keys(dataset: DatasetChoice) -> None: + ref = load_reference(dataset) + assert isinstance(ref, dict) + for key in REFERENCE_KEYS: + assert key in ref, f"Missing key '{key}' in reference for {dataset.value}" + + +# --------------------------------------------------------------------------- +# resolve_config() +# --------------------------------------------------------------------------- + + +def test_resolve_config_populates_reference(tmp_path: Path) -> None: + config = resolve_config( + project_name="test_proj", + model=ModelChoice.UPT, + dataset=DatasetChoice.SHAPENET_CAR, + dataset_path="/tmp/data", + optimizer=OptimizerChoice.ADAMW, + tracker=TrackerChoice.DISABLED, + hardware=HardwareChoice.GPU, + project_dir=tmp_path / "test_proj", + wandb_entity=None, + ) + assert config.project_name == "test_proj" + assert config.model == ModelChoice.UPT + assert config.dataset == DatasetChoice.SHAPENET_CAR + assert config.dataset_path == "/tmp/data" + assert config.optimizer == OptimizerChoice.ADAMW + assert config.tracker == TrackerChoice.DISABLED + assert config.hardware == HardwareChoice.GPU + assert config.project_dir == tmp_path / "test_proj" + assert config.wandb_entity is None + assert REFERENCE_KEYS == config.reference.keys() diff --git a/tests/unit/noether/scaffold/test_generator.py b/tests/unit/noether/scaffold/test_generator.py new file mode 100644 index 00000000..a5a0be50 --- /dev/null +++ b/tests/unit/noether/scaffold/test_generator.py @@ -0,0 +1,70 @@ +# Copyright © 2025 Emmi AI GmbH. All rights reserved. + +from __future__ import annotations + +import itertools +from pathlib import Path + +import pytest +import yaml + +from noether.scaffold.choices import DatasetChoice, HardwareChoice, ModelChoice, OptimizerChoice, TrackerChoice +from noether.scaffold.config import resolve_config +from noether.scaffold.generator import generate_project + +MODELS = list(ModelChoice) +DATASETS = list(DatasetChoice) +COMBOS = list(itertools.product(MODELS, DATASETS)) + + +def _generate(tmp_path: Path, **overrides): + """Helper to generate a project with sensible defaults, accepting overrides.""" + defaults = dict( + project_name="test_proj", + model=ModelChoice.UPT, + dataset=DatasetChoice.SHAPENET_CAR, + dataset_path="/tmp/fake_data", + optimizer=OptimizerChoice.ADAMW, + tracker=TrackerChoice.DISABLED, + hardware=HardwareChoice.GPU, + wandb_entity=None, + ) + defaults.update(overrides) + name = defaults["project_name"] + proj = tmp_path / name + config = resolve_config(**defaults, project_dir=proj) + generate_project(config) + return proj + + +# --------------------------------------------------------------------------- +# Parametrized end-to-end generation +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize(("model", "dataset"), COMBOS, ids=[f"{m.value}-{d.value}" for m, d in COMBOS]) +def test_generate_project(tmp_path: Path, model: ModelChoice, dataset: DatasetChoice) -> None: + proj = _generate(tmp_path, model=model, dataset=dataset) + + # Expected directories exist + assert (proj / "callbacks").is_dir() + assert (proj / "configs").is_dir() + assert (proj / "model").is_dir() + assert (proj / "pipeline").is_dir() + assert (proj / "schemas").is_dir() + assert (proj / "trainers").is_dir() + + # All YAML files parse without error + for yf in proj.rglob("*.yaml"): + content = yf.read_text() + lines = [ + line for line in content.splitlines() if not line.startswith("# @package") + ] # remove Hydra directives to avoid YAML parsing issues + yaml.safe_load("\n".join(lines)) + + # No unresolved placeholders + for ext in ("*.py", "*.yaml"): + for f in proj.rglob(ext): + content = f.read_text() + for placeholder in ("__PROJECT__", "__CLASS__", "__DATASET_PATH__", "__OPTIMIZER__", "__TRACKER__"): + assert placeholder not in content, f"Unresolved {placeholder} in {f.relative_to(proj)}" From 9b94346bed8681e4441873cb78b7a2fd48669eab Mon Sep 17 00:00:00 2001 From: David Hauser Date: Wed, 11 Mar 2026 14:22:30 +0100 Subject: [PATCH 06/12] add noether-init documentation --- README.md | 25 +++- boilerplate_project/README.MD | 3 + docs/source/conf.py | 1 + docs/source/guides/working_with_cli.rst | 29 +++++ docs/source/index.rst | 3 +- docs/source/tutorials/index.rst | 1 + .../tutorials/scaffolding_a_new_project.rst | 109 ++++++++++++++++++ 7 files changed, 169 insertions(+), 2 deletions(-) create mode 100644 docs/source/tutorials/scaffolding_a_new_project.rst diff --git a/README.md b/README.md index 77ea7f23..d0b352bf 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,30 @@ You might be in a situation when your venv won't be configured as intended anymo --- # Quickstart -You can run a training job immediately using the [tutorial](./tutorial/README.MD) configuration. For local development (Mac/CPU), use: +> [!IMPORTANT] +> Before training, you need a prepared dataset. To get started with the ShapeNet-Car dataset, +> follow the download and preprocessing steps in the +> [ShapeNet-Car dataset README](./src/noether/data/datasets/cfd/shapenet_car/README.MD). + +## Scaffold a New Project + +Use `noether-init` to generate a complete training project: + +```console +noether-init my_project --model upt --dataset shapenet_car --dataset-path /path/to/shapenet_car +``` + +Then train with: + +```console +uv run noether-train --config-dir my_project/configs --config-name train +experiment=upt +``` + +See the [scaffolding tutorial](https://noether-docs.emmi.ai/tutorials/scaffolding_a_new_project.html) for all options and the generated project structure. + +## Run the Tutorial Example + +You can also run a training job immediately using the [tutorial](./tutorial/README.MD) configuration. For local development (Mac/CPU), use: ```console uv run noether-train --hp tutorial/configs/train_shapenet.yaml \ diff --git a/boilerplate_project/README.MD b/boilerplate_project/README.MD index 2e4fd806..bd087959 100644 --- a/boilerplate_project/README.MD +++ b/boilerplate_project/README.MD @@ -1,5 +1,8 @@ ## `Noether` Starter Kit Project ---- + +> You can use `noether-init` to automatically scaffold a complete project with your choice of model, dataset, and configuration. See the [scaffolding tutorial](https://noether-docs.emmi.ai/tutorials/scaffolding_a_new_project.html) for details. + This folder contains skeleton/boilerplate code for a minimal working `Noether` training pipeline, including all required components. 1. A dataset that loads (and generates) dummy data. diff --git a/docs/source/conf.py b/docs/source/conf.py index 4971067c..b6dcec3f 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -183,6 +183,7 @@ "**/*.ipynb", "**/*.md", "**/.venv/**", + "**/scaffold/template_files/**", ] diff --git a/docs/source/guides/working_with_cli.rst b/docs/source/guides/working_with_cli.rst index 24cd507a..c98e6613 100644 --- a/docs/source/guides/working_with_cli.rst +++ b/docs/source/guides/working_with_cli.rst @@ -65,3 +65,32 @@ Verify your setup by running the ``estimate`` command, which fetches metadata an noether-data aws estimate noaa-goes16 ABI-L1b-RadC/2023/001/00/ If you see no errors — congratulations, your setup works! + +Scaffolding a New Project +------------------------- + +The ``noether-init`` command generates a complete Noether training project with all required modules and configurations. + +.. code-block:: bash + + noether-init my_project \ + --model upt \ + --dataset shapenet_car \ + --dataset-path /path/to/shapenet_car + +**Required arguments:** + +- ``project_name`` (positional) — project name, must be a valid Python identifier (no hyphens) +- ``--model, -m`` — model architecture (``transformer``, ``upt``, ``ab_upt``, ``transolver``) +- ``--dataset, -d`` — dataset (``shapenet_car``, ``drivaernet``, ``drivaerml``, ``ahmedml``, ``emmi_wing``) +- ``--dataset-path`` — path to dataset on disk + +**Optional arguments:** + +- ``--optimizer, -o`` — optimizer, default: ``adamw`` (also: ``lion``) +- ``--tracker, -t`` — experiment tracker, default: ``disabled`` (also: ``wandb``, ``trackio``, ``tensorboard``) +- ``--hardware`` — hardware target, default: ``gpu`` (also: ``mps``, ``cpu``) +- ``--project-dir, -l`` — parent directory for the project folder, default: current directory +- ``--wandb-entity`` — W&B entity name (only used with ``--tracker wandb``) + +For a detailed walkthrough and the generated project structure, see :doc:`/tutorials/scaffolding_a_new_project`. diff --git a/docs/source/index.rst b/docs/source/index.rst index 9057a233..50cee50b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -37,8 +37,9 @@ Welcome to the Noether Framework documentation. Here you will find available API tutorials/training_first_model_with_code tutorials/full_code_tutorial tutorials/how_to_initialize - + Walkthrough + tutorials/scaffolding_a_new_project .. toctree:: diff --git a/docs/source/tutorials/index.rst b/docs/source/tutorials/index.rst index 45ed34e9..00f3b330 100644 --- a/docs/source/tutorials/index.rst +++ b/docs/source/tutorials/index.rst @@ -8,3 +8,4 @@ Step-by-step instructions to get you up and running with Noether. * :doc:`training_first_model_with_configs`: Learn how to train models by simply editing configuration files. * :doc:`training_first_model_with_code`: Understand how to use Noether as a library to build custom training scripts. * `Walkthrough `_: A hands-on guide through the repository's tutorial examples. +* :doc:`scaffolding_a_new_project`: Use ``noether-init`` to generate a complete training project from scratch. diff --git a/docs/source/tutorials/scaffolding_a_new_project.rst b/docs/source/tutorials/scaffolding_a_new_project.rst new file mode 100644 index 00000000..3ec54fe1 --- /dev/null +++ b/docs/source/tutorials/scaffolding_a_new_project.rst @@ -0,0 +1,109 @@ +Scaffolding a New Project +========================= + +The ``noether-init`` command generates a complete, ready-to-train Noether project for +models and datasets supported out of the box by the framework. It creates all required Python modules, Hydra configuration +files, schemas, data pipelines, trainers, and callbacks, giving you a working starting point that you +can adapt to your own use case. + +Prerequisites +------------- + +Before scaffolding, download and preprocess the dataset you want to use. Each dataset has its own +fetching and preprocessing instructions — see the +`Dataset Zoo README `_ +for an overview and links to dataset-specific guides. + +Example Usage +------------- + +.. code-block:: bash + + noether-init my_project \ + --model upt \ + --dataset shapenet_car \ + --dataset-path /path/to/shapenet_car + +This creates a ``my_project/`` directory in the current working directory with a UPT model and the ``shapenet_car`` dataset. +After completion, ``noether-init`` prints a summary of the configuration and the corresponding +``noether-train`` command to start training. + +Arguments +--------- + +.. list-table:: + :header-rows: 1 + :widths: 25 50 25 + + * - Option + - Values + - Default + * - ``project_name`` *(required)* + - Positional argument. Must be a valid Python identifier (no hyphens). + - + * - ``--model, -m`` *(required)* + - ``transformer``, ``upt``, ``ab_upt``, ``transolver`` + - + * - ``--dataset, -d`` *(required)* + - ``shapenet_car``, ``drivaernet``, ``drivaerml``, ``ahmedml``, ``emmi_wing`` + - + * - ``--dataset-path`` *(required)* + - Path to the dataset on disk + - + * - ``--optimizer, -o`` + - ``adamw``, ``lion`` + - ``adamw`` + * - ``--tracker, -t`` + - ``wandb``, ``trackio``, ``tensorboard``, ``disabled`` + - ``disabled`` + * - ``--hardware`` + - ``gpu``, ``mps``, ``cpu`` + - ``gpu`` + * - ``--project-dir, -l`` + - Parent directory for the project folder + - current directory + * - ``--wandb-entity`` + - W&B entity name (only with ``--tracker wandb``) + - your W&B username + +Generated Project Structure +--------------------------- + +The generated project contains: + +.. code-block:: text + + my_project/ + ├── configs/ + │ ├── callbacks/ # Training callback configs + │ ├── data_specs/ # Data specification configs + │ ├── dataset_normalizers/ + │ ├── dataset_statistics/ + │ ├── datasets/ # Dataset configs + │ ├── experiment/ # Experiment configs (one per model) + │ ├── model/ # Model architecture config + │ ├── optimizer/ # Optimizer config + │ ├── pipeline/ # Data pipeline config + │ ├── tracker/ # Experiment tracker config + │ ├── trainer/ # Trainer config + │ └── train.yaml # Main training config + ├── model/ # Model implementation + ├── schemas/ # Configuration dataclasses + ├── pipeline/ # Data processing (collators, sample processors) + ├── trainers/ # Training loop implementation + └── callbacks/ # Training callbacks + +All Python files are wired up with correct imports for your chosen model, and all Hydra configs reference +your dataset path, optimizer, and tracker selections. + +Running Training +---------------- + +After scaffolding, start training with: + +.. code-block:: bash + + uv run noether-train \ + --config-dir my_project/configs \ + --config-name train \ + +experiment=upt From a14adb87fbc984e99ca7288857e09a0aa3d0685a Mon Sep 17 00:00:00 2001 From: David Hauser Date: Wed, 11 Mar 2026 14:28:12 +0100 Subject: [PATCH 07/12] fix: only copy model specific experiment YAML --- src/noether/scaffold/file_manager.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/noether/scaffold/file_manager.py b/src/noether/scaffold/file_manager.py index 401ff590..8020a707 100644 --- a/src/noether/scaffold/file_manager.py +++ b/src/noether/scaffold/file_manager.py @@ -5,7 +5,7 @@ from importlib.resources.abc import Traversable from pathlib import Path -from .choices import HardwareChoice, ModelChoice +from .choices import HardwareChoice from .config import TEMPLATES, ScaffoldConfig, substitute @@ -216,11 +216,10 @@ def copy_yaml_configs(config: ScaffoldConfig) -> None: content = train_path.read_text() train_path.write_text(content + f"accelerator: {config.hardware.value}\n") - # --- Experiment configs (all 4 models for the dataset's category) --- + # --- Experiment config (selected model only) --- category = ref.get("experiment_category", "shapenet") - for model in ModelChoice: - _copy( - tpl / "experiment" / category / f"{model.value}.yaml", - dst / "experiment" / f"{model.value}.yaml", - config, - ) + _copy( + tpl / "experiment" / category / f"{config.model.value}.yaml", + dst / "experiment" / f"{config.model.value}.yaml", + config, + ) From 5414ab4a7f3d2a36c8302688c204dd50f9f9d357 Mon Sep 17 00:00:00 2001 From: David Hauser Date: Wed, 11 Mar 2026 14:48:39 +0100 Subject: [PATCH 08/12] chore: remove unnecessary comments --- tests/unit/noether/scaffold/test_config.py | 13 ------------- tests/unit/noether/scaffold/test_generator.py | 5 ----- 2 files changed, 18 deletions(-) diff --git a/tests/unit/noether/scaffold/test_config.py b/tests/unit/noether/scaffold/test_config.py index 14c554bc..ec0bf7eb 100644 --- a/tests/unit/noether/scaffold/test_config.py +++ b/tests/unit/noether/scaffold/test_config.py @@ -9,10 +9,6 @@ from noether.scaffold.choices import DatasetChoice, HardwareChoice, ModelChoice, OptimizerChoice, TrackerChoice from noether.scaffold.config import ScaffoldConfig, load_reference, resolve_config, substitute -# --------------------------------------------------------------------------- -# substitute() -# --------------------------------------------------------------------------- - def test_substitute_replaces_all_placeholders() -> None: config = ScaffoldConfig( @@ -34,10 +30,6 @@ def test_substitute_replaces_all_placeholders() -> None: assert result == "kind: my_proj.model.UPT\ndataset_root: /data/shapenet\noptimizer: adamw\ntracker: wandb" -# --------------------------------------------------------------------------- -# load_reference() -# --------------------------------------------------------------------------- - REFERENCE_KEYS = { "experiment_category", "data_specs_file", @@ -58,11 +50,6 @@ def test_load_reference_returns_expected_keys(dataset: DatasetChoice) -> None: assert key in ref, f"Missing key '{key}' in reference for {dataset.value}" -# --------------------------------------------------------------------------- -# resolve_config() -# --------------------------------------------------------------------------- - - def test_resolve_config_populates_reference(tmp_path: Path) -> None: config = resolve_config( project_name="test_proj", diff --git a/tests/unit/noether/scaffold/test_generator.py b/tests/unit/noether/scaffold/test_generator.py index a5a0be50..b80f20db 100644 --- a/tests/unit/noether/scaffold/test_generator.py +++ b/tests/unit/noether/scaffold/test_generator.py @@ -37,11 +37,6 @@ def _generate(tmp_path: Path, **overrides): return proj -# --------------------------------------------------------------------------- -# Parametrized end-to-end generation -# --------------------------------------------------------------------------- - - @pytest.mark.parametrize(("model", "dataset"), COMBOS, ids=[f"{m.value}-{d.value}" for m, d in COMBOS]) def test_generate_project(tmp_path: Path, model: ModelChoice, dataset: DatasetChoice) -> None: proj = _generate(tmp_path, model=model, dataset=dataset) From 5978a571f59cbcb73035688e1336a4753494c331 Mon Sep 17 00:00:00 2001 From: David Hauser Date: Thu, 12 Mar 2026 11:07:21 +0100 Subject: [PATCH 09/12] chore: update noether-init commands in docs to use uv run --- README.md | 2 +- docs/source/guides/working_with_cli.rst | 2 +- docs/source/tutorials/scaffolding_a_new_project.rst | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d0b352bf..bb5ec199 100644 --- a/README.md +++ b/README.md @@ -133,7 +133,7 @@ You might be in a situation when your venv won't be configured as intended anymo Use `noether-init` to generate a complete training project: ```console -noether-init my_project --model upt --dataset shapenet_car --dataset-path /path/to/shapenet_car +uv run noether-init my_project --model upt --dataset shapenet_car --dataset-path /path/to/shapenet_car ``` Then train with: diff --git a/docs/source/guides/working_with_cli.rst b/docs/source/guides/working_with_cli.rst index c98e6613..62878529 100644 --- a/docs/source/guides/working_with_cli.rst +++ b/docs/source/guides/working_with_cli.rst @@ -73,7 +73,7 @@ The ``noether-init`` command generates a complete Noether training project with .. code-block:: bash - noether-init my_project \ + uv run noether-init my_project \ --model upt \ --dataset shapenet_car \ --dataset-path /path/to/shapenet_car diff --git a/docs/source/tutorials/scaffolding_a_new_project.rst b/docs/source/tutorials/scaffolding_a_new_project.rst index 3ec54fe1..deb5e23f 100644 --- a/docs/source/tutorials/scaffolding_a_new_project.rst +++ b/docs/source/tutorials/scaffolding_a_new_project.rst @@ -19,7 +19,7 @@ Example Usage .. code-block:: bash - noether-init my_project \ + uv run noether-init my_project \ --model upt \ --dataset shapenet_car \ --dataset-path /path/to/shapenet_car From 7482ed4fcf32140ac47395191594351f199f50c4 Mon Sep 17 00:00:00 2001 From: David Hauser Date: Thu, 12 Mar 2026 13:10:05 +0100 Subject: [PATCH 10/12] update scaffolding section in how to work with cli docs --- docs/source/guides/working_with_cli.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/source/guides/working_with_cli.rst b/docs/source/guides/working_with_cli.rst index 62878529..64e80e90 100644 --- a/docs/source/guides/working_with_cli.rst +++ b/docs/source/guides/working_with_cli.rst @@ -80,17 +80,17 @@ The ``noether-init`` command generates a complete Noether training project with **Required arguments:** -- ``project_name`` (positional) — project name, must be a valid Python identifier (no hyphens) -- ``--model, -m`` — model architecture (``transformer``, ``upt``, ``ab_upt``, ``transolver``) -- ``--dataset, -d`` — dataset (``shapenet_car``, ``drivaernet``, ``drivaerml``, ``ahmedml``, ``emmi_wing``) +- ``project_name`` (positional) — project name, e.g. ``my_project`` +- ``--model, -m`` — model architecture, e.g. ``ab_upt`` +- ``--dataset, -d`` — dataset, e.g. ``shapenet_car`` - ``--dataset-path`` — path to dataset on disk **Optional arguments:** -- ``--optimizer, -o`` — optimizer, default: ``adamw`` (also: ``lion``) -- ``--tracker, -t`` — experiment tracker, default: ``disabled`` (also: ``wandb``, ``trackio``, ``tensorboard``) -- ``--hardware`` — hardware target, default: ``gpu`` (also: ``mps``, ``cpu``) -- ``--project-dir, -l`` — parent directory for the project folder, default: current directory +- ``--optimizer, -o`` — optimizer, e.g. ``adamw`` (default) +- ``--tracker, -t`` — experiment tracker, e.g. ``wandb`` +- ``--hardware`` — hardware target, e.g. ``gpu`` (default) +- ``--project-dir, -l`` — parent directory for the project folder - ``--wandb-entity`` — W&B entity name (only used with ``--tracker wandb``) -For a detailed walkthrough and the generated project structure, see :doc:`/tutorials/scaffolding_a_new_project`. +For all available options, see :doc:`/tutorials/scaffolding_a_new_project`. \ No newline at end of file From d20ab4a15694f8d311f44c83b3db3cdbaada5162 Mon Sep 17 00:00:00 2001 From: David Hauser Date: Thu, 12 Mar 2026 14:08:58 +0100 Subject: [PATCH 11/12] remove comment --- .../configs/callbacks/training_callbacks_caeml.yaml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/noether/scaffold/template_files/configs/callbacks/training_callbacks_caeml.yaml b/src/noether/scaffold/template_files/configs/callbacks/training_callbacks_caeml.yaml index 941127c4..ecf3d4be 100644 --- a/src/noether/scaffold/template_files/configs/callbacks/training_callbacks_caeml.yaml +++ b/src/noether/scaffold/template_files/configs/callbacks/training_callbacks_caeml.yaml @@ -38,8 +38,4 @@ save_latest_weights: true target_factors: - 0.9999 - name: EmaCallback - # example of how to save/load only specific submodules of a composite model - # model_paths: - # - low_level_blocks - # - high_level_blocks + name: EmaCallback \ No newline at end of file From 2c059e33a6303c55d6f8aba648adacf2e6bcf57b Mon Sep 17 00:00:00 2001 From: David Hauser Date: Thu, 12 Mar 2026 14:30:57 +0100 Subject: [PATCH 12/12] remove SparseTensorOffsetCollator from templates --- src/noether/scaffold/file_manager.py | 10 ------ .../pipeline/collators/__init__.py | 3 -- .../collators/sparse_tensor_offset.py | 35 ------------------- 3 files changed, 48 deletions(-) delete mode 100644 src/noether/scaffold/template_files/pipeline/collators/__init__.py delete mode 100644 src/noether/scaffold/template_files/pipeline/collators/sparse_tensor_offset.py diff --git a/src/noether/scaffold/file_manager.py b/src/noether/scaffold/file_manager.py index 8020a707..fa92267a 100644 --- a/src/noether/scaffold/file_manager.py +++ b/src/noether/scaffold/file_manager.py @@ -61,16 +61,6 @@ def copy_python_files(config: ScaffoldConfig) -> None: # --- Infrastructure files (with __PROJECT__ substitution) --- _copy(tpl / "pipeline" / "__init__.py", project_dir / "pipeline" / "__init__.py", config) - _copy( - tpl / "pipeline" / "collators" / "__init__.py", - project_dir / "pipeline" / "collators" / "__init__.py", - config, - ) - _copy( - tpl / "pipeline" / "collators" / "sparse_tensor_offset.py", - project_dir / "pipeline" / "collators" / "sparse_tensor_offset.py", - config, - ) _copy( tpl / "pipeline" / "multistage_pipelines" / "__init__.py", project_dir / "pipeline" / "multistage_pipelines" / "__init__.py", diff --git a/src/noether/scaffold/template_files/pipeline/collators/__init__.py b/src/noether/scaffold/template_files/pipeline/collators/__init__.py deleted file mode 100644 index 8d0c982a..00000000 --- a/src/noether/scaffold/template_files/pipeline/collators/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright © 2025 Emmi AI GmbH. All rights reserved. - -from .sparse_tensor_offset import SparseTensorOffsetCollator diff --git a/src/noether/scaffold/template_files/pipeline/collators/sparse_tensor_offset.py b/src/noether/scaffold/template_files/pipeline/collators/sparse_tensor_offset.py deleted file mode 100644 index b0a97744..00000000 --- a/src/noether/scaffold/template_files/pipeline/collators/sparse_tensor_offset.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright © 2025 Emmi AI GmbH. All rights reserved. - -from copy import deepcopy - -import torch - -from noether.data.pipeline.collator import Collator - - -class SparseTensorOffsetCollator(Collator): - """Collates sparse tensors by concatenating them along the first axis and creating an offset tensor that maps - each sample to its respective index in the batch. - """ - - def __init__(self, item: str, offset_key: str): - self.item = item - self.offset_key = offset_key - - def __call__(self, samples: list[dict[str, torch.Tensor]]) -> dict[str, torch.Tensor]: - """Concatenates sparse tensors along the first axis and creates an offset tensor. - - Args: - samples: List of individual samples retrieved from the dataset. - - """ - offset = 0 - samples = [deepcopy(sample) for sample in samples] # copy to avoid changing method input - batch: dict[str, torch.Tensor] = {} - for sample in samples: - cur_num_points = len(sample[self.offset_key]) - sample[self.item] = sample[self.item] + offset - offset += cur_num_points - - batch[self.item] = torch.concat([sample[self.item] for sample in samples]) - return batch