From 05fdcbed6ce6f6b9151e4911ebf72c44ab838cab Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 1 Dec 2025 12:06:56 -0800
Subject: [PATCH 01/32] Add typhon model arch.

---
 .../experimental/models/typhon/__init__.py    |  19 +
 .../experimental/models/typhon/typhon.py      | 821 ++++++++++++++++++
 2 files changed, 840 insertions(+)
 create mode 100644 physicsnemo/experimental/models/typhon/__init__.py
 create mode 100644 physicsnemo/experimental/models/typhon/typhon.py

diff --git a/physicsnemo/experimental/models/typhon/__init__.py b/physicsnemo/experimental/models/typhon/__init__.py
new file mode 100644
index 0000000000..430d0e93d2
--- /dev/null
+++ b/physicsnemo/experimental/models/typhon/__init__.py
@@ -0,0 +1,19 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .typhon import Typhon
+
+__all__ = ["Typhon"]
diff --git a/physicsnemo/experimental/models/typhon/typhon.py b/physicsnemo/experimental/models/typhon/typhon.py
new file mode 100644
index 0000000000..4351a17e26
--- /dev/null
+++ b/physicsnemo/experimental/models/typhon/typhon.py
@@ -0,0 +1,821 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+
+import torch
+import torch.nn as nn
+from einops import rearrange
+
+import physicsnemo  # noqa: F401 for docs
+from physicsnemo.utils.version_check import check_min_version
+from physicsnemo.models.transolver.Physics_Attention import (
+    PhysicsAttentionIrregularMesh,
+    gumbel_softmax,
+)
+from physicsnemo.models.transolver.transolver import MLP
+
+from physicsnemo.models.meta import ModelMetaData
+from physicsnemo.models.module import Module
+
+# Check optional dependency availability
+TE_AVAILABLE = check_min_version("transformer-engine", "0.1.0", hard_fail=False)
+if TE_AVAILABLE:
+    import transformer_engine.pytorch as te
+
+ACTIVATION = {
+    "gelu": nn.GELU,
+    "tanh": nn.Tanh,
+    "sigmoid": nn.Sigmoid,
+    "relu": nn.ReLU,
+    "leaky_relu": nn.LeakyReLU(0.1),
+    "softplus": nn.Softplus,
+    "ELU": nn.ELU,
+    "silu": nn.SiLU,
+}
+
+
+class GALE(PhysicsAttentionIrregularMesh):
+    r"""Geometry-Aware Latent Embeddings (GALE) attention layer.
+
+    This is an extension of the Transolver PhysicsAttention mechanism to support
+    cross-attention with a context vector, built from geometry and global embeddings.
+    GALE combines self-attention on learned physical state slices with cross-attention
+    to geometry-aware context, using a learnable mixing weight to blend the two.
+
+    Parameters
+    ----------
+    dim : int
+        Input dimension of the features.
+    heads : int, optional
+        Number of attention heads. Default is 8.
+    dim_head : int, optional
+        Dimension of each attention head. Default is 64.
+    dropout : float, optional
+        Dropout rate. Default is 0.0.
+    slice_num : int, optional
+        Number of learned physical state slices. Default is 64.
+    use_te : bool, optional
+        Whether to use Transformer Engine backend when available. Default is True.
+    plus : bool, optional
+        Whether to use Transolver++ features. Default is False.
+    context_dim : int, optional
+        Dimension of the context vector for cross-attention. Default is 0.
+
+    Notes
+    -----
+    The mixing between self-attention and cross-attention is controlled by a learnable
+    parameter ``state_mixing`` which is passed through a sigmoid function to ensure
+    the mixing weight stays in \([0, 1]\).
+
+    See Also
+    --------
+    :class:`physicsnemo.models.transolver.Physics_Attention.PhysicsAttentionIrregularMesh` : Base physics attention class.
+    :class:`GALE_block` : Transformer block using GALE attention.
+    """
+
+    def __init__(
+        self,
+        dim,
+        heads: int = 8,
+        dim_head: int = 64,
+        dropout: float = 0.0,
+        slice_num: int = 64,
+        use_te: bool = True,
+        plus: bool = False,
+        context_dim: int = 0,
+    ):
+        super().__init__(dim, heads, dim_head, dropout, slice_num, use_te, plus)
+
+        linear_layer = te.Linear if self.use_te else nn.Linear
+
+        # We have additional parameters, here:
+        self.cross_q = linear_layer(dim_head, dim_head)
+        self.cross_k = linear_layer(context_dim, dim_head)
+        self.cross_v = linear_layer(context_dim, dim_head)
+
+        # This is the learnable mixing weight between self and cross attention.
+        # We start near 0.0 since it is passed through a sigmoid to keep the
+        # mixing weight between 0 and 1.
+        self.state_mixing = nn.Parameter(torch.tensor(0.0))
+
+    def compute_slice_attention_cross(
+        self, slice_tokens: torch.Tensor, context: torch.Tensor
+    ) -> torch.Tensor:
+        r"""Compute cross-attention between slice tokens and context.
+
+        Parameters
+        ----------
+        slice_tokens : torch.Tensor
+            Slice tokens of shape \((B, H, N, D)\) where \(B\) is batch size, \(H\) is number of heads, \(N\) is number of slices, and \(D\) is head dimension.
+        context : torch.Tensor
+            Context tensor of shape \((B, H, N_c, D_c)\) where \(N_c\) is number of context slices and \(D_c\) is context dimension.
+
+        Returns
+        -------
+        torch.Tensor
+            Cross-attention output of shape \((B, H, N, D)\).
+        """
+
+        # Project the slice and context tokens:
+        q = self.cross_q(slice_tokens)
+        k = self.cross_k(context)
+        v = self.cross_v(context)
+
+        # Compute the attention:
+        if self.use_te:
+            cross_attention = self.attn_fn(q, k, v)
+        else:
+            cross_attention = torch.nn.functional.scaled_dot_product_attention(
+                q, k, v, is_causal=False
+            )
+
+        return cross_attention
+
+    def forward(
+        self, x: torch.Tensor, context: torch.Tensor | None = None
+    ) -> torch.Tensor:
+        r"""Forward pass of the GALE module.
+
+        Parameters
+        ----------
+        x : torch.Tensor
+            Input tensor of shape \((B, N, C)\) where \(B\) is batch size, \(N\) is number of tokens, and \(C\) is number of channels.
+        context : torch.Tensor, optional
+            Context tensor for cross-attention of shape \((B, H, S_c, D_c)\) where \(H\) is number of heads, \(S_c\) is number of context slices, and \(D_c\) is context dimension. If None, only self-attention is applied. Default is None.
+
+        Returns
+        -------
+        torch.Tensor
+            Output tensor of shape \((B, N, C)\), same shape as input.
+        """
+
+        # Project the inputs onto learned spaces:
+        if self.plus:
+            x_mid = self.project_input_onto_slices(x)
+            # In transolver ++, fx_mid is gone.
+            # x_mid is used to compute the projections instead:
+            fx_mid = x_mid
+        else:
+            x_mid, fx_mid = self.project_input_onto_slices(x)
+
+        # Perform the linear projection of learned latent space onto slices:
+        slice_projections = self.in_project_slice(x_mid)
+
+        # Slice projections has shape [B, N_head, N_tokens, Head_dim], but head_dim may have changed!
+
+        # Use the slice projections and learned spaces to compute the slices, and their weights:
+        slice_weights, slice_tokens = self.compute_slices_from_projections(
+            slice_projections, fx_mid
+        )
+        # slice_weights has shape [Batch, N_heads, N_tokens, Slice_num]
+        # slice_tokens has shape  [Batch, N_heads, N_tokens, head_dim]
+
+        # Apply attention to the slice tokens
+        if self.use_te:
+            self_slice_token = self.compute_slice_attention_te(slice_tokens)
+        else:
+            self_slice_token = self.compute_slice_attention_sdpa(slice_tokens)
+
+        # HERE, we are differing: apply cross-attention with physical states:
+        if context is not None:
+            cross_slice_token = self.compute_slice_attention_cross(
+                slice_tokens, context
+            )
+
+            # Apply learnable mixing:
+            mixing_weight = torch.sigmoid(self.state_mixing)
+            out_slice_token = (
+                mixing_weight * self_slice_token
+                + (1 - mixing_weight) * cross_slice_token
+            )
+
+        else:
+            # Just keep self attention:
+            out_slice_token = self_slice_token
+
+        # Shape unchanged
+
+        # Deslice:
+        outputs = self.project_attention_outputs(out_slice_token, slice_weights)
+
+        # Outputs now has the same shape as the original input x
+
+        return outputs
+
+
+class GALE_block(nn.Module):
+    r"""Transformer encoder block using GALE attention.
+
+    This block replaces standard self-attention with the GALE (Geometry-Aware Latent
+    Embeddings) attention mechanism, which combines physics-aware self-attention with
+    cross-attention to geometry and global context.
+
+    Parameters
+    ----------
+    num_heads : int
+        Number of attention heads.
+    hidden_dim : int
+        Hidden dimension of the transformer.
+    dropout : float
+        Dropout rate.
+    act : str, optional
+        Activation function name. Default is "gelu".
+    mlp_ratio : int, optional
+        Ratio of MLP hidden dimension to ``hidden_dim``. Default is 4.
+    last_layer : bool, optional
+        Whether this is the last layer in the model. Default is False.
+    out_dim : int, optional
+        Output dimension (only used if ``last_layer=True``). Default is 1.
+    slice_num : int, optional
+        Number of learned physical state slices. Default is 32.
+    use_te : bool, optional
+        Whether to use Transformer Engine backend. Default is True.
+    plus : bool, optional
+        Whether to use Transolver++ features. Default is False.
+    context_dim : int, optional
+        Dimension of the context vector for cross-attention. Default is 0.
+
+    Notes
+    -----
+    The block applies layer normalization before the attention operation and uses
+    residual connections after both the attention and MLP layers.
+    """
+
+    def __init__(
+        self,
+        num_heads: int,
+        hidden_dim: int,
+        dropout: float,
+        act="gelu",
+        mlp_ratio=4,
+        last_layer=False,
+        out_dim=1,
+        slice_num=32,
+        use_te=True,
+        plus: bool = False,
+        context_dim: int = 0,
+    ):
+        super().__init__()
+
+        if use_te and not TE_AVAILABLE:
+            raise ImportError(
+                "Transformer Engine is not installed. Please install it with: pip install transformer-engine>=0.1.0"
+            )
+
+        self.last_layer = last_layer
+        if use_te:
+            self.ln_1 = te.LayerNorm(hidden_dim)
+        else:
+            self.ln_1 = nn.LayerNorm(hidden_dim)
+
+        self.Attn = GALE(
+            hidden_dim,
+            heads=num_heads,
+            dim_head=hidden_dim // num_heads,
+            dropout=dropout,
+            slice_num=slice_num,
+            use_te=use_te,
+            plus=plus,
+            context_dim=context_dim,
+        )
+
+        if use_te:
+            self.ln_mlp1 = te.LayerNormMLP(
+                hidden_size=hidden_dim,
+                ffn_hidden_size=hidden_dim * mlp_ratio,
+            )
+        else:
+            self.ln_mlp1 = nn.Sequential(
+                nn.LayerNorm(hidden_dim),
+                MLP(
+                    hidden_dim,
+                    hidden_dim * mlp_ratio,
+                    hidden_dim,
+                    n_layers=0,
+                    res=False,
+                    act=act,
+                    use_te=False,
+                ),
+            )
+
+    def forward(self, fx: torch.Tensor, global_context: torch.Tensor) -> torch.Tensor:
+        r"""Forward pass of the GALE block.
+
+        Parameters
+        ----------
+        fx : torch.Tensor
+            Input tensor of shape \((B, N, C)\) where \(B\) is batch size, \(N\) is number of tokens, and \(C\) is hidden dimension.
+        global_context : torch.Tensor
+            Global context tensor for cross-attention of shape \((B, H, S_c, D_c)\) where \(H\) is number of heads, \(S_c\) is number of context slices, and \(D_c\) is context dimension.
+
+        Returns
+        -------
+        torch.Tensor
+            Output tensor of shape \((B, N, C)\), same shape as input.
+        """
+        fx = self.Attn(self.ln_1(fx), global_context) + fx
+        fx = self.ln_mlp1(fx) + fx
+
+        return fx
+
+
+@dataclass
+class TyphonMetaData(ModelMetaData):
+    """
+    Data class for storing essential meta data needed for the Typhon model.
+    """
+
+    name: str = "Typhon"
+    # Optimization
+    jit: bool = False
+    cuda_graphs: bool = False
+    amp: bool = True
+    # Inference
+    onnx_cpu: bool = False  # No FFT op on CPU
+    onnx_gpu: bool = True
+    onnx_runtime: bool = True
+    # Physics informed
+    var_dim: int = 1
+    func_torch: bool = False
+    auto_grad: bool = False
+
+
+class ContextProjector(nn.Module):
+    r"""Projects context features onto physical state space.
+
+    This context projector is conceptually similar to half of a GALE attention layer.
+    It projects context values (geometry or global embeddings) onto a learned physical
+    state space, but unlike a full attention layer, it never projects back to the
+    original space. The projected features are used as context in all GALE blocks
+    of the Typhon model.
+
+    Parameters
+    ----------
+    dim : int
+        Input dimension of the context features.
+    heads : int, optional
+        Number of projection heads. Default is 8.
+    dim_head : int, optional
+        Dimension of each projection head. Default is 64.
+    dropout : float, optional
+        Dropout rate. Default is 0.0.
+    slice_num : int, optional
+        Number of learned physical state slices. Default is 64.
+    use_te : bool, optional
+        Whether to use Transformer Engine backend when available. Default is True.
+    plus : bool, optional
+        Whether to use Transolver++ features. Default is False.
+
+    Notes
+    -----
+    The global features are reused in all blocks of the model, so the learned
+    projections must capture globally useful features rather than layer-specific ones.
+
+    See Also
+    --------
+    :class:`GALE` : Full GALE attention layer that uses these projected context features.
+    :class:`Typhon` : Main model that uses ContextProjector for geometry and global embeddings.
+    """
+
+    def __init__(
+        self,
+        dim,
+        heads: int = 8,
+        dim_head: int = 64,
+        dropout: float = 0.0,
+        slice_num: int = 64,
+        use_te: bool = True,
+        plus: bool = False,
+    ):
+        super().__init__()
+        inner_dim = dim_head * heads
+        self.dim_head = dim_head
+        self.heads = heads
+        self.plus = plus
+        self.scale = dim_head**-0.5
+        self.use_te = use_te
+
+        # Keep below here:
+        if use_te:
+            self.in_project_x = te.Linear(dim, inner_dim)
+            if not plus:
+                self.in_project_fx = te.Linear(dim, inner_dim)
+        else:
+            self.in_project_x = nn.Linear(dim, inner_dim)
+            if not plus:
+                self.in_project_fx = nn.Linear(dim, inner_dim)
+
+        self.softmax = nn.Softmax(dim=-1)
+        self.dropout = nn.Dropout(dropout)
+        self.temperature = nn.Parameter(torch.ones([1, heads, 1, 1]) * 0.5)
+
+        if plus:
+            linear_layer = te.Linear if self.use_te else nn.Linear
+            self.proj_temperature = torch.nn.Sequential(
+                linear_layer(self.dim_head, slice_num),
+                nn.GELU(),
+                linear_layer(slice_num, 1),
+                nn.GELU(),
+            )
+
+        if self.use_te:
+            self.in_project_slice = te.Linear(dim_head, slice_num)
+        else:
+            self.in_project_slice = nn.Linear(dim_head, slice_num)
+
+    def project_input_onto_slices(
+        self, x: torch.Tensor
+    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        r"""Project the input onto the slice space.
+
+        Parameters
+        ----------
+        x : torch.Tensor
+            Input tensor of shape \((B, N, C)\) where \(B\) is batch size, \(N\) is number of tokens, and \(C\) is number of channels.
+
+        Returns
+        -------
+        torch.Tensor or tuple[torch.Tensor, torch.Tensor]
+            If ``plus=True``, returns single tensor ``x_mid`` of shape \((B, H, N, D)\) where \(H\) is number of heads and \(D\) is head dimension. If ``plus=False``, returns tuple ``(x_mid, fx_mid)`` both of shape \((B, H, N, D)\).
+        """
+        x_mid = rearrange(
+            self.in_project_x(x), "B N (h d) -> B h N d", h=self.heads, d=self.dim_head
+        )
+        if self.plus:
+            return x_mid
+        else:
+            fx_mid = rearrange(
+                self.in_project_fx(x),
+                "B N (h d) -> B h N d",
+                h=self.heads,
+                d=self.dim_head,
+            )
+
+            return x_mid, fx_mid
+
+    def compute_slices_from_projections(
+        self, slice_projections: torch.Tensor, fx: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        r"""Compute slice weights and slice tokens from input projections and latent features.
+
+        Parameters
+        ----------
+        slice_projections : torch.Tensor
+            Projected input tensor of shape \((B, N, H, S)\) where \(B\) is batch size, \(H\) is number of heads, \(N\) is number of tokens, and \(S\) is number of slices, representing the projection of each token onto each slice for each attention head.
+        fx : torch.Tensor
+            Latent feature tensor of shape \((B, N, H, D)\) where \(D\) is head dimension, representing the learned states to be aggregated by the slice weights.
+
+        Returns
+        -------
+        tuple[torch.Tensor, torch.Tensor]
+            - ``slice_weights``: Tensor of shape \((B, N, H, S)\), representing the normalized weights for each slice per token and head.
+            - ``slice_token``: Tensor of shape \((B, H, S, D)\), representing the aggregated latent features for each slice, head, and batch.
+
+        Notes
+        -----
+        The function computes a temperature-scaled softmax over the slice projections to obtain
+        slice weights, then aggregates the latent features for each slice using these weights.
+        The aggregated features are normalized by the sum of weights for numerical stability.
+        """
+
+        # Project the latent space vectors on to the weight computation space,
+        # and compute a temperature adjusted softmax.
+
+        if self.plus:
+            temperature = self.temperature + self.proj_temperature(fx)
+            clamped_temp = torch.clamp(temperature, min=0.01).to(
+                slice_projections.dtype
+            )
+            slice_weights = gumbel_softmax(
+                slice_projections, clamped_temp
+            )  # [Batch, N_heads, N_tokens, Slice_num]
+
+        else:
+            clamped_temp = torch.clamp(self.temperature, min=0.5, max=5).to(
+                slice_projections.dtype
+            )
+            slice_weights = nn.functional.softmax(
+                slice_projections / clamped_temp, dim=-1
+            )  # [Batch, N_heads, N_tokens, Slice_num]
+
+        # Cast to the computation type (since the parameter is probably fp32)
+        slice_weights = slice_weights.to(slice_projections.dtype)
+
+        # This does the projection of the latent space fx by the weights:
+
+        # Computing the slice tokens is a matmul followed by a normalization.
+        # It can, unfortunately, overflow in reduced precision, so normalize first:
+        slice_norm = slice_weights.sum(2)  # [Batch, N_heads, Slice_num]
+        normed_weights = slice_weights / (slice_norm[:, :, None, :] + 1e-2)
+        slice_token = torch.matmul(normed_weights.transpose(2, 3), fx)
+
+        # Return the original weights, not the normed weights:
+        return slice_weights, slice_token
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        r"""Reduced forward pass projecting inputs to physical state slices.
+
+        This performs a partial physics attention operation: it projects the input onto
+        learned physical state slices but does not project back to the original space.
+
+        Parameters
+        ----------
+        x : torch.Tensor
+            Input tensor of shape \((B, N, C)\) where \(B\) is batch size, \(N\) is number of tokens, and \(C\) is number of channels.
+
+        Returns
+        -------
+        torch.Tensor
+            Slice tokens of shape \((B, H, S, D)\) where \(H\) is number of heads, \(S\) is number of slices, and \(D\) is head dimension.
+        """
+
+        # All of this is derived from the PhysicsAttention Layer
+
+        # Project the inputs onto learned spaces:
+        if self.plus:
+            x_mid = self.project_input_onto_slices(x)
+            # In transolver ++, fx_mid is gone.
+            # x_mid is used to compute the projections instead:
+            fx_mid = x_mid
+        else:
+            x_mid, fx_mid = self.project_input_onto_slices(x)
+
+        # Perform the linear projection of learned latent space onto slices:
+        slice_projections = self.in_project_slice(x_mid)
+
+        # Slice projections has shape [B, N_head, N_tokens, Head_dim], but head_dim may have changed!
+
+        # Use the slice projections and learned spaces to compute the slices, and their weights:
+        _, slice_tokens = self.compute_slices_from_projections(
+            slice_projections, fx_mid
+        )
+        # _ has shape [Batch, N_heads, N_tokens, Slice_num]
+        # slice_tokens has shape  [Batch, N_heads, N_tokens, head_dim]
+
+        return slice_tokens
+
+
+class Typhon(Module):
+    r"""Typhon: Geometry-Aware Physics Attention Transformer.
+
+    Typhon is an adaptation of the Transolver architecture, replacing standard attention
+    with GALE (Geometry-Aware Latent Embeddings) attention. GALE combines physics-aware
+    self-attention on learned state slices with cross-attention to geometry and global
+    context embeddings.
+
+    The model projects geometry and global features onto physical state spaces, which are
+    then used as context in all transformer blocks. This design enables the model to
+    incorporate geometric structure and global information throughout the forward pass.
+
+    Parameters
+    ----------
+    functional_dim : int
+        Dimension of the input values (local embeddings), not including global embeddings or geometry features. Input will be projected to ``n_hidden`` before processing.
+    out_dim : int
+        Dimension of the output of the model.
+    geometry_dim : int, optional
+        Pointwise dimension of the geometry input features. If provided, geometry features will be projected onto physical states and used as context in all GALE layers. Default is None.
+    global_dim : int, optional
+        Dimension of the global embedding features. If provided, global features will be projected onto physical states and used as context in all GALE layers. Default is None.
+    n_layers : int, optional
+        Number of GALE layers in the model. Default is 4.
+    n_hidden : int, optional
+        Hidden dimension of the transformer. Default is 256.
+    dropout : float, optional
+        Dropout rate applied across the GALE layers. Default is 0.0.
+    n_head : int, optional
+        Number of attention heads in each GALE layer. Must evenly divide ``n_hidden`` to yield an integer head dimension. Default is 8.
+    act : str, optional
+        Activation function name. Default is "gelu".
+    mlp_ratio : int, optional
+        Ratio of MLP hidden dimension to ``n_hidden``. Default is 4.
+    slice_num : int, optional
+        Number of learned physical state slices in the GALE layers, representing the number of learned states each layer should project inputs onto. Default is 32.
+    use_te : bool, optional
+        Whether to use Transformer Engine backend when available. Default is True.
+    time_input : bool, optional
+        Whether to include time embeddings. Default is False.
+    plus : bool, optional
+        Whether to use Transolver++ features in the GALE layers. Default is False.
+
+    Raises
+    ------
+    ValueError
+        If ``n_hidden`` is not evenly divisible by ``n_head``.
+
+
+    Forward
+    ----------
+    local_embedding : torch.Tensor
+        Local embedding of the input data of shape \((B, N, C)\) where \(B\) is batch size, \(N\) is number of nodes/tokens, and \(C\) is ``functional_dim``. Output will have the same \((B, N)\) shape but with ``out_dim`` channels.
+    global_embedding : torch.Tensor, optional
+        Global embedding of the input data of shape \((B, N_g, C_g)\) where \(N_g\) is number of global tokens and \(C_g\) is ``global_dim``. If None, global context is not used. Default is None.
+    geometry : torch.Tensor, optional
+        Geometry features of the input data of shape \((B, N, C_{geo})\) where \(C_{geo}\) is ``geometry_dim``. If None, geometry context is not used. Default is None.
+    time : torch.Tensor, optional
+        Time embedding (currently not implemented). Default is None.
+
+    Returns
+    -------
+    torch.Tensor
+        Output tensor of shape \((B, N, C_{out})\) where \(C_{out}\) is ``out_dim``.
+
+    Notes
+    -----
+    Typhon currently supports unstructured mesh input only. Enhancements for image-based
+    and voxel-based inputs may be available in the future.
+
+    For more details on Transolver, see:
+    - https://arxiv.org/pdf/2402.02366
+    - https://arxiv.org/pdf/2502.02414
+
+    See Also
+    --------
+    :class:`GALE` : The attention mechanism used in Typhon.
+    :class:`GALE_block` : Transformer block using GALE attention.
+    :class:`ContextProjector` : Projects context features onto physical states.
+
+    Examples
+    --------
+    Basic usage with local embeddings only:
+
+    >>> import torch
+    >>> import physicsnemo
+    >>> model = physicsnemo.models.Typhon(
+    ...     functional_dim=64,
+    ...     out_dim=3,
+    ...     n_hidden=256,
+    ...     n_layers=4
+    ... )
+    >>> local_emb = torch.randn(2, 1000, 64)  # (batch, nodes, features)
+    >>> output = model(local_emb)
+    >>> output.shape
+    torch.Size([2, 1000, 3])
+
+    Usage with geometry and global context:
+
+    >>> model = physicsnemo.models.Typhon(
+    ...     functional_dim=64,
+    ...     out_dim=3,
+    ...     geometry_dim=3,
+    ...     global_dim=16,
+    ...     n_hidden=256,
+    ...     n_layers=4
+    ... )
+    >>> local_emb = torch.randn(2, 1000, 64)
+    >>> geometry = torch.randn(2, 1000, 3)  # (batch, nodes, spatial_dim)
+    >>> global_emb = torch.randn(2, 1, 16)  # (batch, 1, global_features)
+    >>> output = model(local_emb, global_embedding=global_emb, geometry=geometry)
+    >>> output.shape
+    torch.Size([2, 1000, 3])
+    """
+
+    def __init__(
+        self,
+        functional_dim: int,
+        out_dim: int,
+        geometry_dim: int | None = None,
+        global_dim: int | None = None,
+        n_layers: int = 4,
+        n_hidden: int = 256,
+        dropout: float = 0.0,
+        n_head: int = 8,
+        act: str = "gelu",
+        mlp_ratio: int = 4,
+        slice_num: int = 32,
+        use_te: bool = True,
+        time_input: bool = False,
+        plus: bool = False,
+    ) -> None:
+        super().__init__(meta=TyphonMetaData())
+        self.__name__ = "Typhon"
+
+        self.use_te = use_te
+        # Check that the hidden dimension and head dimensions are compatible:
+        if not n_hidden % n_head == 0:
+            raise ValueError(
+                f"Typhon requires n_hidden % n_head == 0, but instead got {n_hidden % n_head}"
+            )
+
+        # These are to project geometry embeddings and global embeddings onto
+        # a physical state space:
+        context_dim = 0
+        if geometry_dim is not None:
+            self.geometry_tokenizer = ContextProjector(
+                geometry_dim,
+                n_head,
+                n_hidden // n_head,
+                dropout,
+                slice_num,
+                use_te,
+                plus,
+            )
+            context_dim += n_hidden // n_head
+        if global_dim is not None:
+            self.global_tokenizer = ContextProjector(
+                global_dim, n_head, n_hidden // n_head, dropout, slice_num, use_te, plus
+            )
+            context_dim += n_hidden // n_head
+
+        # This MLP is the initial projection onto the hidden space
+        self.preprocess = MLP(
+            functional_dim,
+            n_hidden * 2,
+            n_hidden,
+            n_layers=0,
+            res=False,
+            act=act,
+            use_te=use_te,
+        )
+
+        self.n_hidden = n_hidden
+
+        self.blocks = nn.ModuleList(
+            [
+                GALE_block(
+                    num_heads=n_head,
+                    hidden_dim=n_hidden,
+                    dropout=dropout,
+                    act=act,
+                    mlp_ratio=mlp_ratio,
+                    slice_num=slice_num,
+                    last_layer=(_ == n_layers - 1),
+                    use_te=use_te,
+                    plus=plus,
+                    context_dim=context_dim,
+                )
+                for _ in range(n_layers)
+            ]
+        )
+
+        if use_te:
+            self.ln_mlp_out = te.LayerNormLinear(
+                in_features=n_hidden, out_features=out_dim
+            )
+        else:
+            self.ln_mlp_out = nn.Sequential(
+                nn.LayerNorm(n_hidden),
+                nn.Linear(n_hidden, out_dim),
+            )
+
+        self.time_input = time_input
+        if time_input:
+            self.time_fc = nn.Sequential(
+                nn.Linear(n_hidden, n_hidden), nn.SiLU(), nn.Linear(n_hidden, n_hidden)
+            )
+
+    def forward(
+        self,
+        local_embedding: torch.Tensor,
+        global_embedding: torch.Tensor | None = None,
+        geometry: torch.Tensor | None = None,
+        time: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        r"""Forward pass of the Typhon model.
+
+        The model constructs global context embeddings from geometry and global features by
+        projecting them onto physical state spaces. These context embeddings are then used
+        in all GALE blocks via cross-attention, allowing geometric and global information to
+        guide the learned physical state dynamics.
+
+        """
+
+        # First, construct the global context vectors:
+        global_context_input = []
+
+        if geometry is not None:
+            geometry_states = self.geometry_tokenizer(geometry)
+            global_context_input.append(geometry_states)
+
+        if global_embedding is not None:
+            global_states = self.global_tokenizer(global_embedding)
+            global_context_input.append(global_states)
+
+        # Construct the embedding states:
+        if len(global_context_input) > 0:
+            embedding_states = torch.cat(global_context_input, dim=-1)
+
+        # Project the inputs to the hidden dimension:
+        x = self.preprocess(local_embedding)
+
+        for block in self.blocks:
+            x = block(x, embedding_states)
+
+        # Now, pass the data through the model:
+        x = self.ln_mlp_out(x)
+
+        return x

From 7a473e5a5b230be3c81327793fa0845f142d05cc Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 1 Dec 2025 13:34:46 -0800
Subject: [PATCH 02/32] Add typhon example configs.

---
 .../transolver/src/conf/typhon_surface.yaml   | 105 ++++++++++++++++++
 .../transolver/src/conf/typhon_volume.yaml    | 104 +++++++++++++++++
 2 files changed, 209 insertions(+)
 create mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
 create mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml

diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
new file mode 100644
index 0000000000..57713b7ac2
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
@@ -0,0 +1,105 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+
+output_dir: "runs"
+checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
+run_id: "typhon/surface/bfloat16"
+
+# Performance considerations:
+precision: bfloat16 # float32, float16, bfloat16, or float8
+compile: true
+profile: false
+
+# Training configuration
+training:
+
+  num_epochs: 501 # Add one to save at 250
+  save_interval: 25  # Save checkpoint every N epochs
+
+  # StepLR scheduler: Decays the learning rate by gamma every step_size epochs
+  scheduler:
+    name: "StepLR"
+    params:
+      step_size: 100    # Decay every 200 epochs (set X as desired)
+      gamma: 0.5        # Decay factor
+
+
+  # Optimizer configuration
+  optimizer:
+    _target_: torch.optim.AdamW
+    lr: 1.0e-3
+    weight_decay: 1.0e-4
+    betas: [0.9, 0.999]
+    eps: 1.0e-8
+
+# Model configuration
+model:
+  _target_: physicsnemo.experimental.models.typhon.Typhon
+  functional_dim: 6 # Input feature dimension
+  global_dim: 2
+  geometry_dim: 3
+  out_dim: 4        # Output feature dimension
+  n_layers: 8       # Number of transformer layers
+  n_hidden: 256     # Hidden dimension
+  dropout: 0.0      # Dropout rate
+  n_head: 8         # Number of attention heads
+  act: "gelu"       # Activation function
+  mlp_ratio: 2     # MLP ratio in attention blocks
+  slice_num: 512     # Number of slices in physics attention
+  use_te: false     # Use transformer engine
+  plus: false
+
+
+# Data configuration
+data:
+  train:
+    data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/train/
+  val:
+    data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/val/
+  max_workers: 8
+  normalization_dir: "src/"  # Directory for normalization files
+  preload_depth: 1
+  pin_memory: true
+  resolution: 200_000
+  mode: surface
+  # Preprocessing switches:
+  # (Changing thes will change the embedding dim)
+  data_keys:
+    - "surface_fields"
+    - "surface_areas"
+    - "surface_mesh_centers"
+    - "surface_normals"
+    - "air_density"
+    - "stream_velocity"
+    - "stl_faces"
+    - "stl_centers"
+    - "stl_coordinates"
+  include_geometry: true
+  include_normals: true
+  include_sdf: false
+  translational_invariance: true
+  scale_invariance: true
+  reference_scale: [12.0, 4.5, 3.25]
+  geometry_sampling: 300_000
+  broadcast_global_features: false
+  return_mesh_features: False
+
+# Logging configuration
+logging:
+  level: INFO
+  format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml
new file mode 100644
index 0000000000..a4dd02408a
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml
@@ -0,0 +1,104 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+
+output_dir: "runs"
+checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
+run_id: "volumeX/fake-name"
+
+# Performance considerations:
+precision: bfloat16 # float32, float16, bfloat16, or float8
+compile: false
+profile: false
+
+# Training configuration
+training:
+  num_epochs: 501 # Add one to save at 250
+  save_interval: 25  # Save checkpoint every N epochs
+
+  # StepLR scheduler: Decays the learning rate by gamma every step_size epochs
+  scheduler:
+    name: "StepLR"
+    params:
+      step_size: 100    # Decay every 200 epochs (set X as desired)
+      gamma: 0.5        # Decay factor
+
+
+  # Optimizer configuration
+  optimizer:
+    _target_: torch.optim.AdamW
+    lr: 1.0e-3
+    weight_decay: 1.0e-4
+    betas: [0.9, 0.999]
+    eps: 1.0e-8
+
+
+# Model configuration
+model:
+  _target_: physicsnemo.experimental.models.typhon.Typhon
+  functional_dim: 7 # Input feature dimension
+  global_dim: 2
+  geometry_dim: 3
+  out_dim: 5        # Output feature dimension
+  n_layers: 6       # Number of transformer layers
+  n_hidden: 256     # Hidden dimension
+  dropout: 0.0      # Dropout rate
+  n_head: 8         # Number of attention heads
+  act: "gelu"       # Activation function
+  mlp_ratio: 2      # MLP ratio in attention blocks
+  slice_num: 512     # Number of slices in physics attention
+  use_te: false     # Use transformer engine
+  plus: false
+
+
+
+# Data configuration
+data:
+  train:
+    data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/train/
+  val:
+    data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/val/
+  max_workers: 8
+  normalization_dir: "src/"  # Directory for normalization files
+  preload_depth: 1
+  volume_sample_from_disk: true # Enable faster IO on pre-shuffled volumetric data
+  pin_memory: true
+  resolution: 100_000
+  # Preprocessing switches:
+  # (Changing these will change the embedding dim)
+  include_geometry: true
+  include_normals: true
+  include_sdf: true
+  translational_invariance: true
+  scale_invariance: true
+  reference_scale: [12.0, 4.5, 3.25]
+  geometry_sampling: 300_000
+  broadcast_global_features: false
+  mode: volume
+  data_keys:
+    - "volume_fields"
+    - "volume_mesh_centers"
+    - "air_density"
+    - "stream_velocity"
+    - "stl_faces"
+    - "stl_centers"
+    - "stl_coordinates"
+
+# Logging configuration
+logging:
+  level: INFO
+  format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'

From c0983f812fe5ef379712abb623c872afc3e5db5d Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 2 Dec 2025 07:36:28 -0800
Subject: [PATCH 03/32] Enable typhon to work with multiple streams of data.

---
 .../datapipes/cae/transolver_datapipe.py      |  96 +++++++++---
 .../experimental/models/typhon/typhon.py      | 137 ++++++++++++------
 2 files changed, 169 insertions(+), 64 deletions(-)

diff --git a/physicsnemo/datapipes/cae/transolver_datapipe.py b/physicsnemo/datapipes/cae/transolver_datapipe.py
index 8a4faae344..083d5e98c8 100644
--- a/physicsnemo/datapipes/cae/transolver_datapipe.py
+++ b/physicsnemo/datapipes/cae/transolver_datapipe.py
@@ -69,7 +69,7 @@ class TransolverDataConfig:
     """
 
     data_path: Path | None
-    model_type: Literal["surface", "volume"] = "surface"
+    model_type: Literal["surface", "volume", "combined"] = "surface"
     resolution: int = 200_000
 
     # Control what features are added to the inputs to the model:
@@ -82,7 +82,8 @@ class TransolverDataConfig:
 
     # For controlling the normalization of target values:
     scaling_type: Optional[Literal["min_max_scaling", "mean_std_scaling"]] = None
-    normalization_factors: Optional[torch.Tensor] = None
+    surface_factors: Optional[torch.Tensor] = None
+    volume_factors: Optional[torch.Tensor] = None
 
     ############################################################
     # Translation invariance configuration:
@@ -230,7 +231,7 @@ def preprocess_surface_data(
             fields = fields[idx]
 
         if self.config.scaling_type is not None:
-            fields = self.scale_model_targets(fields, self.config.normalization_factors)
+            fields = self.scale_model_targets(fields, self.config.surface_factors)
 
         return {
             "embeddings": embeddings,
@@ -333,7 +334,7 @@ def preprocess_volume_data(
             fields = fields[idx]
 
         if self.config.scaling_type is not None:
-            fields = self.scale_model_targets(fields, self.config.normalization_factors)
+            fields = self.scale_model_targets(fields, self.config.volume_factors)
 
         return {
             "embeddings": embeddings,
@@ -422,7 +423,7 @@ def process_data(self, data_dict):
             "stl_centers",
         ]
 
-        if self.config.model_type == "volume":
+        if self.config.model_type == "volume" or self.config.model_type == "combined":
             # We need these for the SDF calculation:
             required_keys.extend(
                 [
@@ -430,7 +431,9 @@ def process_data(self, data_dict):
                     "stl_faces",
                 ]
             )
-        elif self.config.model_type == "surface":
+        elif (
+            self.config.model_type == "surface" or self.config.model_type == "combined"
+        ):
             required_keys.extend(
                 [
                     "surface_normals",
@@ -446,15 +449,20 @@ def process_data(self, data_dict):
         else:
             center_of_mass = None
 
-        field_key = f"{self.config.model_type}_fields"
-        coords_key = f"{self.config.model_type}_mesh_centers"
-
-        required_keys.extend(
-            [
-                field_key,
-                coords_key,
-            ]
-        )
+        if self.config.model_type == "surface" or self.config.model_type == "combined":
+            required_keys.extend(
+                [
+                    "surface_fields",
+                    "surface_mesh_centers",
+                ]
+            )
+        elif self.config.model_type == "volume" or self.config.model_type == "combined":
+            required_keys.extend(
+                [
+                    "volume_fields",
+                    "volume_mesh_centers",
+                ]
+            )
 
         missing_keys = [key for key in required_keys if key not in data_dict]
         if missing_keys:
@@ -471,10 +479,39 @@ def process_data(self, data_dict):
             outputs = self.preprocess_surface_data(
                 data_dict, center_of_mass, scale_factor
             )
+            outputs["embeddings"] = [
+                outputs["embeddings"],
+            ]
+            outputs["fields"] = [
+                outputs["fields"],
+            ]
         elif self.config.model_type == "volume":
             outputs = self.preprocess_volume_data(
                 data_dict, center_of_mass, scale_factor
             )
+            outputs["embeddings"] = [
+                outputs["embeddings"],
+            ]
+            outputs["fields"] = [
+                outputs["fields"],
+            ]
+        elif self.config.model_type == "combined":
+            outputs_surf = self.preprocess_surface_data(
+                data_dict, center_of_mass, scale_factor
+            )
+
+            outputs_vol = self.preprocess_volume_data(
+                data_dict, center_of_mass, scale_factor
+            )
+
+            outputs = {}
+            outputs["embeddings"] = [
+                outputs_surf["embeddings"],
+                outputs_vol["embeddings"],
+            ]
+            # This should be the same in either:
+            outputs["fx"] = outputs_surf["fx"]
+            outputs["fields"] = [outputs_surf["fields"], outputs_vol["fields"]]
 
         if self.config.include_geometry:
             outputs["geometry"] = self.process_geometry(
@@ -512,6 +549,7 @@ def unscale_model_targets(
         fields: torch.Tensor | None = None,
         air_density: torch.Tensor | None = None,
         stream_velocity: torch.Tensor | None = None,
+        factor_type: Literal["surface", "volume", "auto"] = "auto",
     ):
         """
         Unscale the model outputs based on the configured scaling factors.
@@ -521,7 +559,18 @@ def unscale_model_targets(
 
         """
 
-        factors = self.config.normalization_factors
+        match factor_type:
+            case "surface":
+                factors = self.config.surface_factors
+            case "volume":
+                factors = self.config.volume_factors
+            case "auto":
+                if self.config.model_type == "surface":
+                    factors = self.config.surface_factors
+                elif self.config.model_type == "volume":
+                    factors = self.config.volume_factors
+                else:
+                    raise ValueError(f"Invalid model type {self.config.model_type}")
 
         if self.config.scaling_type == "mean_std_scaling":
             field_mean = factors["mean"]
@@ -591,9 +640,11 @@ def __call__(self, data_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor
 
         """
         outputs = self.process_data(data_dict)
-
         for key in outputs.keys():
-            outputs[key] = outputs[key].unsqueeze(0)
+            if isinstance(outputs[key], list):
+                outputs[key] = [item.unsqueeze(0) for item in outputs[key]]
+            else:
+                outputs[key] = outputs[key].unsqueeze(0)
 
         return outputs
 
@@ -610,10 +661,8 @@ def __iter__(self):
 def create_transolver_dataset(
     cfg: DictConfig,
     phase: Literal["train", "val", "test"],
-    # keys_to_read: list[str],
-    # keys_to_read_if_available: dict[str, torch.Tensor],
-    scaling_factors: list[float],
-    # normalize_coordinates: bool = True,
+    surface_factors: dict[str, torch.Tensor] | None = None,
+    volume_factors: dict[str, torch.Tensor] | None = None,
     device_mesh: torch.distributed.DeviceMesh | None = None,
     placements: dict[str, torch.distributed.tensor.Placement] | None = None,
 ):
@@ -694,7 +743,8 @@ def create_transolver_dataset(
     datapipe = TransolverDataPipe(
         input_path,
         resolution=cfg.resolution,
-        normalization_factors=scaling_factors,
+        surface_factors=surface_factors,
+        volume_factors=volume_factors,
         model_type=model_type,
         scaling_type="mean_std_scaling",
         **overrides,
diff --git a/physicsnemo/experimental/models/typhon/typhon.py b/physicsnemo/experimental/models/typhon/typhon.py
index 4351a17e26..45af3f573f 100644
--- a/physicsnemo/experimental/models/typhon/typhon.py
+++ b/physicsnemo/experimental/models/typhon/typhon.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 from dataclasses import dataclass
+from collections.abc import Sequence
 
 import torch
 import torch.nn as nn
@@ -131,7 +132,10 @@ def compute_slice_attention_cross(
         """
 
         # Project the slice and context tokens:
-        q = self.cross_q(slice_tokens)
+        
+        q_input = torch.cat(slice_tokens, dim=-2)
+        q = self.cross_q(q_input)
+        
         k = self.cross_k(context)
         v = self.cross_v(context)
 
@@ -142,11 +146,13 @@ def compute_slice_attention_cross(
             cross_attention = torch.nn.functional.scaled_dot_product_attention(
                 q, k, v, is_causal=False
             )
+        cross_attention = torch.split(cross_attention, slice_tokens[0].shape[-2], dim=-2)
+
 
         return cross_attention
 
     def forward(
-        self, x: torch.Tensor, context: torch.Tensor | None = None
+        self, x: tuple[torch.Tensor, ...], context: torch.Tensor | None = None
     ) -> torch.Tensor:
         r"""Forward pass of the GALE module.
 
@@ -162,34 +168,29 @@ def forward(
         torch.Tensor
             Output tensor of shape \((B, N, C)\), same shape as input.
         """
-
         # Project the inputs onto learned spaces:
         if self.plus:
-            x_mid = self.project_input_onto_slices(x)
+            x_mid = [ self.project_input_onto_slices(_x) for _x in x ]
             # In transolver ++, fx_mid is gone.
             # x_mid is used to compute the projections instead:
-            fx_mid = x_mid
+            fx_mid = [ _x_mid for _x_mid in x_mid ]
         else:
-            x_mid, fx_mid = self.project_input_onto_slices(x)
+            x_mid, fx_mid = zip(*[ self.project_input_onto_slices(_x) for _x in x ])
 
         # Perform the linear projection of learned latent space onto slices:
-        slice_projections = self.in_project_slice(x_mid)
+        slice_projections = [ self.in_project_slice(_x_mid) for _x_mid in x_mid ]
 
         # Slice projections has shape [B, N_head, N_tokens, Head_dim], but head_dim may have changed!
-
         # Use the slice projections and learned spaces to compute the slices, and their weights:
-        slice_weights, slice_tokens = self.compute_slices_from_projections(
-            slice_projections, fx_mid
-        )
+        slice_weights, slice_tokens = zip(*[self.compute_slices_from_projections(proj, _fx_mid) for proj, _fx_mid in zip(slice_projections, fx_mid)])
         # slice_weights has shape [Batch, N_heads, N_tokens, Slice_num]
         # slice_tokens has shape  [Batch, N_heads, N_tokens, head_dim]
-
         # Apply attention to the slice tokens
         if self.use_te:
-            self_slice_token = self.compute_slice_attention_te(slice_tokens)
+            self_slice_token = [ self.compute_slice_attention_te(_slice_token) for _slice_token in slice_tokens ]
         else:
-            self_slice_token = self.compute_slice_attention_sdpa(slice_tokens)
-
+            self_slice_token = [ self.compute_slice_attention_sdpa(_slice_token) for _slice_token in slice_tokens ]
+        
         # HERE, we are differing: apply cross-attention with physical states:
         if context is not None:
             cross_slice_token = self.compute_slice_attention_cross(
@@ -198,10 +199,9 @@ def forward(
 
             # Apply learnable mixing:
             mixing_weight = torch.sigmoid(self.state_mixing)
-            out_slice_token = (
-                mixing_weight * self_slice_token
-                + (1 - mixing_weight) * cross_slice_token
-            )
+            out_slice_token = [ mixing_weight * sst + (1 - mixing_weight) * cst
+                for sst, cst in zip(self_slice_token, cross_slice_token)
+            ]
 
         else:
             # Just keep self attention:
@@ -210,7 +210,9 @@ def forward(
         # Shape unchanged
 
         # Deslice:
-        outputs = self.project_attention_outputs(out_slice_token, slice_weights)
+        outputs = [
+            self.project_attention_outputs(ost, sw) for ost, sw in zip(out_slice_token, slice_weights)
+        ]
 
         # Outputs now has the same shape as the original input x
 
@@ -327,8 +329,13 @@ def forward(self, fx: torch.Tensor, global_context: torch.Tensor) -> torch.Tenso
         torch.Tensor
             Output tensor of shape \((B, N, C)\), same shape as input.
         """
-        fx = self.Attn(self.ln_1(fx), global_context) + fx
-        fx = self.ln_mlp1(fx) + fx
+        
+        normed_inputs = [ self.ln_1(_fx) for _fx in fx ]
+        attn = self.Attn(normed_inputs, global_context)
+        
+        fx = [ attn[i] + normed_inputs[i] for i in range(len(normed_inputs)) ]
+        
+        fx = [ self.ln_mlp1(_fx) + _fx for _fx in fx ]
 
         return fx
 
@@ -569,6 +576,25 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         return slice_tokens
 
 
+
+def _normalize_dim(x):
+    # Accept int as scalar
+    if isinstance(x, int):
+        return (x,)
+    # Accept any non-string sequence of ints
+    if isinstance(x, Sequence) and not isinstance(x, (str, bytes)):
+        return tuple(int(v) for v in x)
+    raise TypeError(f"Invalid dim specifier {x!r}")
+
+
+def _normalize_tensor(x):
+    # Accept int as scalar
+    if isinstance(x, torch.Tensor):
+        return (x,)
+    if isinstance(x, Sequence):
+        return x
+    raise TypeError(f"Invalid tensor structure")
+
 class Typhon(Module):
     r"""Typhon: Geometry-Aware Physics Attention Transformer.
 
@@ -686,8 +712,8 @@ class Typhon(Module):
 
     def __init__(
         self,
-        functional_dim: int,
-        out_dim: int,
+        functional_dim: int | tuple[int, ...],
+        out_dim: int | tuple[int, ...],
         geometry_dim: int | None = None,
         global_dim: int | None = None,
         n_layers: int = 4,
@@ -731,17 +757,33 @@ def __init__(
             )
             context_dim += n_hidden // n_head
 
-        # This MLP is the initial projection onto the hidden space
-        self.preprocess = MLP(
-            functional_dim,
-            n_hidden * 2,
-            n_hidden,
-            n_layers=0,
-            res=False,
-            act=act,
-            use_te=use_te,
+        functional_dims = _normalize_dim(functional_dim)
+        out_dims = _normalize_dim(out_dim)
+      
+        if len(functional_dims) != len(out_dims):
+            raise ValueError(
+                f"functional_dim and out_dim must be the same length, but instead got {len(functional_dims)} and {len(out_dims)}"
+            )
+      
+        # This MLP is the initial projection onto the hidden space.
+        # One per input "type"
+        
+        self.preprocess = nn.ModuleList(
+            [
+                MLP(
+                    f,
+                    n_hidden * 2,
+                    n_hidden,
+                    n_layers=0,
+                    res=False,
+                    act=act,
+                    use_te=use_te,
+                )
+                for f in functional_dims
+            ]
         )
 
+
         self.n_hidden = n_hidden
 
         self.blocks = nn.ModuleList(
@@ -763,15 +805,26 @@ def __init__(
         )
 
         if use_te:
-            self.ln_mlp_out = te.LayerNormLinear(
-                in_features=n_hidden, out_features=out_dim
+            self.ln_mlp_out = nn.ModuleList(
+                [
+                    te.LayerNormLinear(
+                        in_features=n_hidden, out_features=o
+                    ) for o in out_dims
+                 ]
             )
         else:
-            self.ln_mlp_out = nn.Sequential(
-                nn.LayerNorm(n_hidden),
-                nn.Linear(n_hidden, out_dim),
+            self.ln_mlp_out = nn.ModuleList(
+                [
+                    nn.Sequential(
+                        nn.LayerNorm(n_hidden),
+                        nn.Linear(n_hidden, o),
+                    )
+                    for o in out_dims
+                ]
+                
             )
 
+
         self.time_input = time_input
         if time_input:
             self.time_fc = nn.Sequential(
@@ -780,7 +833,7 @@ def __init__(
 
     def forward(
         self,
-        local_embedding: torch.Tensor,
+        local_embedding: torch.Tensor | tuple[torch.Tensor, ...],
         global_embedding: torch.Tensor | None = None,
         geometry: torch.Tensor | None = None,
         time: torch.Tensor | None = None,
@@ -809,13 +862,15 @@ def forward(
         if len(global_context_input) > 0:
             embedding_states = torch.cat(global_context_input, dim=-1)
 
+        local_embedding = _normalize_tensor(local_embedding)
+
         # Project the inputs to the hidden dimension:
-        x = self.preprocess(local_embedding)
+        x = [ self.preprocess[i](le) for i, le in enumerate(local_embedding) ]
 
         for block in self.blocks:
             x = block(x, embedding_states)
 
         # Now, pass the data through the model:
-        x = self.ln_mlp_out(x)
+        x = [self.ln_mlp_out[i](x[i]) for i in range(len(x))]
 
         return x

From da69cb6fe009b1980b84055067f98b676c4fcf05 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 2 Dec 2025 08:49:51 -0800
Subject: [PATCH 04/32] Clean up configs and attempt to remove duplications

---
 .../src/conf/datapipe/combined.yaml           | 35 +++++++
 .../transolver/src/conf/datapipe/core.yaml    | 60 ++++++++++++
 .../transolver/src/conf/datapipe/surface.yaml | 33 +++++++
 .../transolver/src/conf/datapipe/volume.yaml  | 31 +++++++
 .../transolver/src/conf/model/transolver.yaml | 34 +++++++
 .../transolver/src/conf/model/typhon.yaml     | 31 +++++++
 .../transolver/src/conf/training/base.yaml    | 32 +++++++
 .../src/conf/transolver_surface.yaml          | 79 +---------------
 .../src/conf/transolver_volume.yaml           | 75 ++-------------
 .../transolver/src/conf/typhon_combined.yaml  | 47 ++++++++++
 .../transolver/src/conf/typhon_surface.yaml   | 79 ++--------------
 .../transolver/src/conf/typhon_volume.yaml    | 80 ++--------------
 .../transolver/src/train.py                   | 91 +++++++++++++------
 13 files changed, 394 insertions(+), 313 deletions(-)
 create mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/combined.yaml
 create mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/core.yaml
 create mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/surface.yaml
 create mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/volume.yaml
 create mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/model/transolver.yaml
 create mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml
 create mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/training/base.yaml
 create mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/typhon_combined.yaml

diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/combined.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/combined.yaml
new file mode 100644
index 0000000000..e4bcbd16c1
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/combined.yaml
@@ -0,0 +1,35 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+defaults:
+  -  core
+
+# Overrides for combined data:
+mode: combined
+
+# combined-speficic needs:
+data_keys:
+  - "volume_fields"
+  - "volume_mesh_centers"
+  - "surface_fields"
+  - "surface_mesh_centers"
+  - "surface_normals"
+  - "surface_areas"
+  - "air_density"
+  - "stream_velocity"
+  - "stl_faces"
+  - "stl_centers"
+  - "stl_coordinates"
\ No newline at end of file
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/core.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/core.yaml
new file mode 100644
index 0000000000..83c2b9fa6e
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/core.yaml
@@ -0,0 +1,60 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Paths to your data:
+train:
+  data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/train/
+val:
+  data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/val/
+
+# You can set a normalization factor directory:
+normalization_dir: "src/"
+
+# How many events in advance should we be preloading?
+preload_depth: 1
+
+# Pin memory for GPU transfers?
+pin_memory: true
+
+# Sampling resolution of the point clouds:
+resolution: 100_000
+
+# Surface / Volume / (combined, if supported)
+mode: ???
+
+# For building embeddings: include normal directions for each point?
+include_normals: true
+# Include SDF?  (It's 0 for surface data...)
+include_sdf: true
+# Apply translation invariance via center-of-mass subtraction?
+translational_invariance: true
+# Rescale x/y/z inputs to the model for scale invariance?
+scale_invariance: true
+reference_scale: [12.0, 4.5, 3.25]
+
+# Which parts of the data files to read?  No need to read everything, all the time.
+data_keys: ???
+
+# Load and return the STL geometry info in the dataloader?
+include_geometry: false
+
+# Broadcast global features to the same resolution as points?
+broadcast_global_features: true
+
+# Return the mesh areas and normals?  You don't usually want this for training.
+# We switch it on automatically for inference.
+return_mesh_features: false
+
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/surface.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/surface.yaml
new file mode 100644
index 0000000000..8363a7a9dc
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/surface.yaml
@@ -0,0 +1,33 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+defaults:
+  -  core
+
+# Overrides for surface data:
+mode: surface
+
+# Surface-speficic needs:
+data_keys:
+  - "surface_fields"
+  - "surface_mesh_centers"
+  - "surface_normals"
+  - "surface_areas"
+  - "air_density"
+  - "stream_velocity"
+  - "stl_faces"
+  - "stl_centers"
+  - "stl_coordinates"
\ No newline at end of file
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/volume.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/volume.yaml
new file mode 100644
index 0000000000..b222fda5f9
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/volume.yaml
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+defaults:
+  -  core
+
+# Overrides for volume data:
+mode: volume
+
+# volume-speficic needs:
+data_keys:
+  - "volume_fields"
+  - "volume_mesh_centers"
+  - "air_density"
+  - "stream_velocity"
+  - "stl_faces"
+  - "stl_centers"
+  - "stl_coordinates"
\ No newline at end of file
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/model/transolver.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/model/transolver.yaml
new file mode 100644
index 0000000000..c43fb8560c
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/model/transolver.yaml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+_target_: physicsnemo.models.transolver.Transolver
+functional_dim: 2
+out_dim: 4
+embedding_dim: 6
+n_layers: 8
+n_hidden: 256
+dropout: 0.0
+n_head: 8
+act: "gelu"
+mlp_ratio: 2
+slice_num: 512
+unified_pos: false
+ref: 8
+structured_shape: null
+use_te: false
+time_input: false
+plus: false
+
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml
new file mode 100644
index 0000000000..166cf7bded
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+_target_: physicsnemo.experimental.models.typhon.Typhon
+functional_dim: 6
+global_dim: 2
+geometry_dim: 3
+out_dim: 4
+n_layers: 8
+n_hidden: 256
+dropout: 0.0
+n_head: 8
+act: "gelu"
+mlp_ratio: 2
+slice_num: 512
+use_te: false
+plus: false
+
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/training/base.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/training/base.yaml
new file mode 100644
index 0000000000..18797ea051
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/training/base.yaml
@@ -0,0 +1,32 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+num_epochs: 501
+save_interval: 25
+
+scheduler:
+  name: "StepLR"
+  params:
+    step_size: 100
+    gamma: 0.5
+
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 1.0e-3
+  weight_decay: 1.0e-4
+  betas: [0.9, 0.999]
+  eps: 1.0e-8
+
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/transolver_surface.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/transolver_surface.yaml
index db7f5938c9..797e004e86 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/transolver_surface.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/transolver_surface.yaml
@@ -14,7 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
+defaults:
+  - training: base
+  - model: transolver
+  - datapipe: surface
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
@@ -25,80 +28,6 @@ precision: bfloat16 # float32, float16, bfloat16, or float8
 compile: true
 profile: false
 
-# Training configuration
-training:
-  num_epochs: 501 # Add one to save at 250
-  save_interval: 25  # Save checkpoint every N epochs
-
-  # StepLR scheduler: Decays the learning rate by gamma every step_size epochs
-  scheduler:
-    name: "StepLR"
-    params:
-      step_size: 100    # Decay every 200 epochs (set X as desired)
-      gamma: 0.5        # Decay factor
-
-  # Optimizer configuration
-  optimizer:
-    _target_: torch.optim.AdamW
-    lr: 1.0e-3
-    weight_decay: 1.0e-4
-    betas: [0.9, 0.999]
-    eps: 1.0e-8
-
-# Model configuration
-model:
-  _target_: physicsnemo.models.transolver.Transolver
-  functional_dim: 2 # Input feature dimension
-  out_dim: 4        # Output feature dimension
-  embedding_dim: 6  # Spatial embedding dimension
-  n_layers: 8       # Number of transformer layers
-  n_hidden: 256     # Hidden dimension
-  dropout: 0.0      # Dropout rate
-  n_head: 8         # Number of attention heads
-  act: "gelu"       # Activation function
-  mlp_ratio: 2      # MLP ratio in attention blocks
-  slice_num: 512     # Number of slices in physics attention
-  unified_pos: false # Whether to use unified positional embeddings
-  ref: 8            # Reference dimension for unified pos
-  structured_shape: null
-  use_te: false     # Use transformer engine
-  time_input: false # Whether to use time embeddings
-  plus: false
-
-
-# Data configuration
-data:
-  train:
-    data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/train/
-  val:
-    data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/val/
-  max_workers: 8
-  normalization_dir: "src/"  # Directory for normalization files
-  preload_depth: 1
-  pin_memory: true
-  resolution: 300_000
-  mode: surface
-  # Preprocessing switches:
-  # (Changing thes will change the embedding dim)
-  include_normals: true
-  include_sdf: false
-  translational_invariance: true
-  scale_invariance: true
-  reference_scale: [12.0, 4.5, 3.25]
-  data_keys:
-    - "surface_fields"
-    - "surface_mesh_centers"
-    - "surface_normals"
-    - "surface_areas"
-    - "air_density"
-    - "stream_velocity"
-    - "stl_faces"
-    - "stl_centers"
-    - "stl_coordinates"
-  include_geometry: false
-  broadcast_global_features: true
-  return_mesh_features: false
-
 # Logging configuration
 logging:
   level: INFO
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/transolver_volume.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/transolver_volume.yaml
index 04a907c1b4..3cc0ba5c27 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/transolver_volume.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/transolver_volume.yaml
@@ -14,7 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
+defaults:
+  - training: base
+  - model: transolver
+  - datapipe: volume
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
@@ -25,75 +28,9 @@ precision: bfloat16 # float32, float16, bfloat16, or float8
 compile: true
 profile: false
 
-# Training configuration
-training:
-  num_epochs: 501 # Add one to save at 250
-  save_interval: 25  # Save checkpoint every N epochs
-
-  # StepLR scheduler: Decays the learning rate by gamma every step_size epochs
-  scheduler:
-    name: "StepLR"
-    params:
-      step_size: 100    # Decay every 200 epochs (set X as desired)
-      gamma: 0.5        # Decay factor
-
-  # Optimizer configuration
-  optimizer:
-    _target_: torch.optim.AdamW
-    lr: 1.0e-3
-    weight_decay: 1.0e-4
-    betas: [0.9, 0.999]
-    eps: 1.0e-8
-
-# Model configuration
 model:
-  _target_: physicsnemo.models.transolver.Transolver
-  functional_dim: 2 # Input feature dimension
-  out_dim: 5        # Output feature dimension
-  embedding_dim: 7  # Spatial embedding dimension
-  n_layers: 8       # Number of transformer layers
-  n_hidden: 256     # Hidden dimension
-  dropout: 0.0      # Dropout rate
-  n_head: 8         # Number of attention heads
-  act: "gelu"       # Activation function
-  mlp_ratio: 2      # MLP ratio in attention blocks
-  slice_num: 512     # Number of slices in physics attention
-  unified_pos: false # Whether to use unified positional embeddings
-  ref: 8            # Reference dimension for unified pos
-  structured_shape: null
-  use_te: false     # Use transformer engine
-  time_input: false # Whether to use time embeddings
-  plus: false
-
-
-# Data configuration
-data:
-  train:
-    data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/train/
-  val:
-    data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/val/
-  max_workers: 8
-  normalization_dir: "src/"  # Directory for normalization files
-  preload_depth: 1
-  volume_sample_from_disk: true # Enable faster IO on pre-shuffled volumetric data
-  pin_memory: true
-  resolution: 300_000
-  # Preprocessing switches:
-  # (Changing thes will change the embedding dim)
-  include_normals: true
-  include_sdf: true
-  translational_invariance: true
-  scale_invariance: true
-  reference_scale: [12.0, 4.5, 3.25]
-  mode: volume
-  data_keys:
-    - "volume_fields"
-    - "volume_mesh_centers"
-    - "air_density"
-    - "stream_velocity"
-    - "stl_faces"
-    - "stl_centers"
-    - "stl_coordinates"
+  out_dim: 5
+  embedding_dim: 7
 
 # Logging configuration
 logging:
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_combined.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_combined.yaml
new file mode 100644
index 0000000000..ca6feeec5e
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_combined.yaml
@@ -0,0 +1,47 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+defaults:
+  - training: base
+  - model: typhon
+  - datapipe: combined
+
+output_dir: "runs"
+checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
+run_id: "typhon/combined/bfloat16"
+
+model:
+  functional_dim: 
+   - 7 
+   - 7
+  out_dim: 
+   - 4
+   - 5
+
+# Performance considerations:
+precision: float32 # float32, float16, bfloat16, or float8
+compile: false
+profile: false
+
+data.include_geometry: true
+data.geometry_sampling: 300_000
+data.broadcast_global_features: false
+
+
+# Logging configuration
+logging:
+  level: INFO
+  format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
index 57713b7ac2..23e2e6b0e1 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
@@ -14,7 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
+defaults:
+  - training: base
+  - model: typhon
+  - datapipe: surface
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
@@ -25,79 +28,11 @@ precision: bfloat16 # float32, float16, bfloat16, or float8
 compile: true
 profile: false
 
-# Training configuration
-training:
-
-  num_epochs: 501 # Add one to save at 250
-  save_interval: 25  # Save checkpoint every N epochs
-
-  # StepLR scheduler: Decays the learning rate by gamma every step_size epochs
-  scheduler:
-    name: "StepLR"
-    params:
-      step_size: 100    # Decay every 200 epochs (set X as desired)
-      gamma: 0.5        # Decay factor
-
-
-  # Optimizer configuration
-  optimizer:
-    _target_: torch.optim.AdamW
-    lr: 1.0e-3
-    weight_decay: 1.0e-4
-    betas: [0.9, 0.999]
-    eps: 1.0e-8
-
-# Model configuration
-model:
-  _target_: physicsnemo.experimental.models.typhon.Typhon
-  functional_dim: 6 # Input feature dimension
-  global_dim: 2
-  geometry_dim: 3
-  out_dim: 4        # Output feature dimension
-  n_layers: 8       # Number of transformer layers
-  n_hidden: 256     # Hidden dimension
-  dropout: 0.0      # Dropout rate
-  n_head: 8         # Number of attention heads
-  act: "gelu"       # Activation function
-  mlp_ratio: 2     # MLP ratio in attention blocks
-  slice_num: 512     # Number of slices in physics attention
-  use_te: false     # Use transformer engine
-  plus: false
 
+data.include_geometry: true
+data.geometry_sampling: 300_000
+data.broadcast_global_features: false
 
-# Data configuration
-data:
-  train:
-    data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/train/
-  val:
-    data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/val/
-  max_workers: 8
-  normalization_dir: "src/"  # Directory for normalization files
-  preload_depth: 1
-  pin_memory: true
-  resolution: 200_000
-  mode: surface
-  # Preprocessing switches:
-  # (Changing thes will change the embedding dim)
-  data_keys:
-    - "surface_fields"
-    - "surface_areas"
-    - "surface_mesh_centers"
-    - "surface_normals"
-    - "air_density"
-    - "stream_velocity"
-    - "stl_faces"
-    - "stl_centers"
-    - "stl_coordinates"
-  include_geometry: true
-  include_normals: true
-  include_sdf: false
-  translational_invariance: true
-  scale_invariance: true
-  reference_scale: [12.0, 4.5, 3.25]
-  geometry_sampling: 300_000
-  broadcast_global_features: false
-  return_mesh_features: False
 
 # Logging configuration
 logging:
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml
index a4dd02408a..1f47ce5511 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml
@@ -14,7 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
+defaults:
+  - training: base
+  - model: typhon
+  - datapipe: volume
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
@@ -25,78 +28,15 @@ precision: bfloat16 # float32, float16, bfloat16, or float8
 compile: false
 profile: false
 
-# Training configuration
-training:
-  num_epochs: 501 # Add one to save at 250
-  save_interval: 25  # Save checkpoint every N epochs
-
-  # StepLR scheduler: Decays the learning rate by gamma every step_size epochs
-  scheduler:
-    name: "StepLR"
-    params:
-      step_size: 100    # Decay every 200 epochs (set X as desired)
-      gamma: 0.5        # Decay factor
-
-
-  # Optimizer configuration
-  optimizer:
-    _target_: torch.optim.AdamW
-    lr: 1.0e-3
-    weight_decay: 1.0e-4
-    betas: [0.9, 0.999]
-    eps: 1.0e-8
+data.include_geometry: true
+data.geometry_sampling: 300_000
+data.broadcast_global_features: false
 
 
-# Model configuration
 model:
-  _target_: physicsnemo.experimental.models.typhon.Typhon
-  functional_dim: 7 # Input feature dimension
-  global_dim: 2
-  geometry_dim: 3
-  out_dim: 5        # Output feature dimension
-  n_layers: 6       # Number of transformer layers
-  n_hidden: 256     # Hidden dimension
-  dropout: 0.0      # Dropout rate
-  n_head: 8         # Number of attention heads
-  act: "gelu"       # Activation function
-  mlp_ratio: 2      # MLP ratio in attention blocks
-  slice_num: 512     # Number of slices in physics attention
-  use_te: false     # Use transformer engine
-  plus: false
-
-
-
-# Data configuration
-data:
-  train:
-    data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/train/
-  val:
-    data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/val/
-  max_workers: 8
-  normalization_dir: "src/"  # Directory for normalization files
-  preload_depth: 1
-  volume_sample_from_disk: true # Enable faster IO on pre-shuffled volumetric data
-  pin_memory: true
-  resolution: 100_000
-  # Preprocessing switches:
-  # (Changing these will change the embedding dim)
-  include_geometry: true
-  include_normals: true
-  include_sdf: true
-  translational_invariance: true
-  scale_invariance: true
-  reference_scale: [12.0, 4.5, 3.25]
-  geometry_sampling: 300_000
-  broadcast_global_features: false
-  mode: volume
-  data_keys:
-    - "volume_fields"
-    - "volume_mesh_centers"
-    - "air_density"
-    - "stream_velocity"
-    - "stl_faces"
-    - "stl_centers"
-    - "stl_coordinates"
+  functional_dim: 7
+  out_dim: 5
+  n_layers: 6
 
 # Logging configuration
 logging:
diff --git a/examples/cfd/external_aerodynamics/transolver/src/train.py b/examples/cfd/external_aerodynamics/transolver/src/train.py
index a7cf2299c6..f7714f4b6c 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/train.py
+++ b/examples/cfd/external_aerodynamics/transolver/src/train.py
@@ -254,13 +254,26 @@ def forward_pass(
     targets = batch["fields"]
 
     # Cast precisions:
-    features, embeddings = cast_precisions(features, embeddings, precision=precision)
+    # features, = cast_precisions(features, precision=precision)
+    # embeddings = cast_precisions(embeddings, precision=precision)
 
     if "geometry" in batch.keys():
         (geometry,) = cast_precisions(batch["geometry"], precision=precision)
     else:
         geometry = None
 
+    all_metrics = {}
+    if datapipe.config.model_type == "combined":
+        modes = ["surface", "volume"]
+    elif datapipe.config.model_type == "surface":
+        modes = [
+            "surface",
+        ]
+    elif datapipe.config.model_type == "volume":
+        modes = [
+            "volume",
+        ]
+
     with get_autocast_context(precision):
         # For fp8, we may have to pad the inputs:
         if precision == "float8" and TE_AVAILABLE:
@@ -273,25 +286,37 @@ def forward_pass(
         else:
             outputs = model(fx=features, embedding=embeddings)
 
-        outputs = unpad_output_for_fp8(outputs, output_pad_size)
+        outputs = [unpad_output_for_fp8(o, output_pad_size) for o in outputs]
+
+        loss = [torch.nn.functional.mse_loss(o, t) for o, t in zip(outputs, targets)]
+        for i, _loss in enumerate(loss):
+            all_metrics[f"loss/{modes[i]}"] = _loss
 
-        loss = torch.nn.functional.mse_loss(outputs, targets)
+        full_loss = torch.sum(torch.stack(loss))
 
     air_density = batch["air_density"] if "air_density" in batch.keys() else None
     stream_velocity = (
         batch["stream_velocity"] if "stream_velocity" in batch.keys() else None
     )
 
-    unscaled_outputs = datapipe.unscale_model_targets(
-        outputs, air_density=air_density, stream_velocity=stream_velocity
-    )
-    unscaled_targets = datapipe.unscale_model_targets(
-        targets, air_density=air_density, stream_velocity=stream_velocity
-    )
+    for i in range(len(outputs)):
+        unscaled_outputs = datapipe.unscale_model_targets(
+            outputs[i],
+            air_density=air_density,
+            stream_velocity=stream_velocity,
+            factor_type=modes[i],
+        )
+        unscaled_targets = datapipe.unscale_model_targets(
+            targets[i],
+            air_density=air_density,
+            stream_velocity=stream_velocity,
+            factor_type=modes[i],
+        )
 
-    metrics = metrics_fn(unscaled_outputs, unscaled_targets, dist_manager, data_mode)
+        metrics = metrics_fn(unscaled_outputs, unscaled_targets, dist_manager, modes[i])
+        all_metrics.update(metrics)
 
-    return loss, metrics, (unscaled_outputs, unscaled_targets)
+    return full_loss, all_metrics, (unscaled_outputs, unscaled_targets)
 
 
 @profile
@@ -344,7 +369,7 @@ def train_epoch(
             precision,
             output_pad_size,
             dist_manager,
-            cfg.data.mode,
+            cfg.datapipe.mode,
             dataloader,
         )
 
@@ -460,7 +485,7 @@ def val_epoch(
                 precision,
                 output_pad_size,
                 dist_manager,
-                cfg.data.mode,
+                cfg.datapipe.mode,
                 dataloader,
             )
 
@@ -592,7 +617,8 @@ def main(cfg: DictConfig):
     cfg, output_pad_size = update_model_params_for_fp8(cfg, logger)
 
     # Set up model
-    model = hydra.utils.instantiate(cfg.model)
+    # (Using partial convert to get lists, etc., instead of ListConfigs.)
+    model = hydra.utils.instantiate(cfg.model, _convert_="partial")
     logger.info(f"\n{torchinfo.summary(model, verbose=0)}")
 
     model.to(dist_manager.device)
@@ -607,31 +633,42 @@ def main(cfg: DictConfig):
     logger.info(f"Number of parameters: {num_params}")
 
     # Load the normalization file from configured directory (defaults to current dir)
-    norm_dir = getattr(cfg.data, "normalization_dir", ".")
-    if cfg.data.mode == "surface":
+    norm_dir = getattr(cfg.datapipe, "normalization_dir", ".")
+    if cfg.datapipe.mode == "surface" or cfg.datapipe.mode == "combined":
         norm_file = str(Path(norm_dir) / "surface_fields_normalization.npz")
-    elif cfg.data.mode == "volume":
-        norm_file = str(Path(norm_dir) / "volume_fields_normalization.npz")
+        norm_data = np.load(norm_file)
+        surface_factors = {
+            "mean": torch.from_numpy(norm_data["mean"]).to(dist_manager.device),
+            "std": torch.from_numpy(norm_data["std"]).to(dist_manager.device),
+        }
+    else:
+        surface_factors = None
 
-    norm_data = np.load(norm_file)
-    norm_factors = {
-        "mean": torch.from_numpy(norm_data["mean"]).to(dist_manager.device),
-        "std": torch.from_numpy(norm_data["std"]).to(dist_manager.device),
-    }
+    if cfg.datapipe.mode == "volume" or cfg.datapipe.mode == "combined":
+        norm_file = str(Path(norm_dir) / "volume_fields_normalization.npz")
+        norm_data = np.load(norm_file)
+        volume_factors = {
+            "mean": torch.from_numpy(norm_data["mean"]).to(dist_manager.device),
+            "std": torch.from_numpy(norm_data["std"]).to(dist_manager.device),
+        }
+    else:
+        volume_factors = None
 
     # Training dataset
     train_dataloader = create_transolver_dataset(
-        cfg.data,
+        cfg.datapipe,
         phase="train",
-        scaling_factors=norm_factors,
+        surface_factors=surface_factors,
+        volume_factors=volume_factors,
     )
 
     # Validation dataset
 
     val_dataloader = create_transolver_dataset(
-        cfg.data,
+        cfg.datapipe,
         phase="val",
-        scaling_factors=norm_factors,
+        surface_factors=surface_factors,
+        volume_factors=volume_factors,
     )
 
     num_replicas = dist_manager.world_size

From 025b843db78b46c97ab7cbbe8cbf7366880a1240 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 2 Dec 2025 10:26:44 -0800
Subject: [PATCH 05/32] Enable surface / volume combined training.

---
 .../src/conf/transolver_surface.yaml          |   5 +-
 .../transolver/src/conf/typhon_combined.yaml  |   7 +-
 .../transolver/src/conf/typhon_surface.yaml   |  14 +-
 .../transolver/src/conf/typhon_volume.yaml    |   9 +-
 .../transolver/src/metrics.py                 |   3 +
 .../transolver/src/train.py                   | 141 ++++++++++++++----
 .../transolver/src/utils.py                   | 102 +++++++++++++
 .../datapipes/cae/transolver_datapipe.py      |  12 --
 8 files changed, 238 insertions(+), 55 deletions(-)
 create mode 100644 examples/cfd/external_aerodynamics/transolver/src/utils.py

diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/transolver_surface.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/transolver_surface.yaml
index 797e004e86..e45951a310 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/transolver_surface.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/transolver_surface.yaml
@@ -24,10 +24,13 @@ checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output
 run_id: "surface/bfloat16"
 
 # Performance considerations:
-precision: bfloat16 # float32, float16, bfloat16, or float8
+precision: float32 # float32, float16, bfloat16, or float8
 compile: true
 profile: false
 
+datapipe:
+  include_sdf: false
+
 # Logging configuration
 logging:
   level: INFO
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_combined.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_combined.yaml
index ca6feeec5e..fbefbd7246 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_combined.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_combined.yaml
@@ -36,9 +36,10 @@ precision: float32 # float32, float16, bfloat16, or float8
 compile: false
 profile: false
 
-data.include_geometry: true
-data.geometry_sampling: 300_000
-data.broadcast_global_features: false
+datapipe:
+  include_geometry: true
+  geometry_sampling: 300_000
+  broadcast_global_features: false
 
 
 # Logging configuration
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
index 23e2e6b0e1..4bc9d6d147 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
@@ -24,14 +24,18 @@ checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output
 run_id: "typhon/surface/bfloat16"
 
 # Performance considerations:
-precision: bfloat16 # float32, float16, bfloat16, or float8
-compile: true
+precision: float32 # float32, float16, bfloat16, or float8
+compile: false
 profile: false
 
+model:
+  functional_dim: 6
 
-data.include_geometry: true
-data.geometry_sampling: 300_000
-data.broadcast_global_features: false
+datapipe:
+  include_sdf: false
+  include_geometry: true
+  geometry_sampling: 300_000
+  broadcast_global_features: false
 
 
 # Logging configuration
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml
index 1f47ce5511..a51849b84e 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml
@@ -24,13 +24,14 @@ checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output
 run_id: "volumeX/fake-name"
 
 # Performance considerations:
-precision: bfloat16 # float32, float16, bfloat16, or float8
+precision: float32 # float32, float16, bfloat16, or float8
 compile: false
 profile: false
 
-data.include_geometry: true
-data.geometry_sampling: 300_000
-data.broadcast_global_features: false
+datapipe:
+  include_geometry: true
+  geometry_sampling: 300_000
+  broadcast_global_features: false
 
 
 model:
diff --git a/examples/cfd/external_aerodynamics/transolver/src/metrics.py b/examples/cfd/external_aerodynamics/transolver/src/metrics.py
index 143e4fa338..34dbb74aff 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/metrics.py
+++ b/examples/cfd/external_aerodynamics/transolver/src/metrics.py
@@ -19,6 +19,8 @@
 from physicsnemo.distributed import ShardTensor
 from physicsnemo.distributed import DistributedManager
 
+from utils import tensorwise
+
 
 def all_reduce_dict(
     metrics: dict[str, torch.Tensor], dm: DistributedManager
@@ -49,6 +51,7 @@ def all_reduce_dict(
     return metrics
 
 
+@tensorwise
 def metrics_fn(
     pred: torch.Tensor,
     target: torch.Tensor,
diff --git a/examples/cfd/external_aerodynamics/transolver/src/train.py b/examples/cfd/external_aerodynamics/transolver/src/train.py
index f7714f4b6c..823c31f0e1 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/train.py
+++ b/examples/cfd/external_aerodynamics/transolver/src/train.py
@@ -22,6 +22,8 @@
 import collections
 from contextlib import nullcontext
 
+from collections.abc import Sequence
+
 # Configuration:
 import hydra
 import omegaconf
@@ -59,6 +61,11 @@
     downsample_surface,
 )
 
+# tensorwise is to handle single-point-cloud or multi-point-cloud running.
+# it's a decorator that will automatically unzip one or more of a list of tensors,
+# run the funtcion, and rezip the results.
+from utils import tensorwise
+
 # Special import, if transformer engine is available:
 from physicsnemo.core.version_check import check_version_spec
 
@@ -166,9 +173,12 @@ def get_autocast_context(precision: str) -> nullcontext:
         return nullcontext()
 
 
-def cast_precisions(*tensors: torch.Tensor, precision: str) -> list[torch.Tensor]:
+@tensorwise
+def cast_precisions(tensor: torch.Tensor, precision: str) -> torch.Tensor:
     """
     Casts the tensors to the specified precision.
+
+    We are careful to take either a tensor or list of tensors, and return the same format.
     """
 
     match precision:
@@ -180,11 +190,12 @@ def cast_precisions(*tensors: torch.Tensor, precision: str) -> list[torch.Tensor
             dtype = None
 
     if dtype is not None:
-        tensors = [t.to(dtype) for t in tensors]
-
-    return tensors
+        return tensor.to(dtype)
+    else:
+        return tensor
 
 
+@tensorwise
 def pad_input_for_fp8(
     features: torch.Tensor,
     embeddings: torch.Tensor,
@@ -217,6 +228,7 @@ def pad_input_for_fp8(
     return features, geometry
 
 
+@tensorwise
 def unpad_output_for_fp8(
     outputs: torch.Tensor, output_pad_size: int | None
 ) -> torch.Tensor:
@@ -236,6 +248,14 @@ def unpad_output_for_fp8(
     return outputs
 
 
+@tensorwise
+def loss_fn(outputs: torch.Tensor, targets: torch.Tensor) -> torch.Tensor:
+    """
+    Compute the loss for the model.
+    """
+    return torch.nn.functional.mse_loss(outputs, targets)
+
+
 def forward_pass(
     batch: dict,
     model: torch.nn.Module,
@@ -247,6 +267,12 @@ def forward_pass(
 ):
     """
     Run the forward pass of the model for one batch, including metrics and loss calculation.
+
+    Transolver takes just one tensor for features, embeddings.
+    Typhon takes a  list of tensors, for each.
+
+    Typhon needs a `geometry` tensor, so that's the switch we use to distinguish.
+
     """
 
     features = batch["fx"]
@@ -254,16 +280,17 @@ def forward_pass(
     targets = batch["fields"]
 
     # Cast precisions:
-    # features, = cast_precisions(features, precision=precision)
-    # embeddings = cast_precisions(embeddings, precision=precision)
-
+    features = cast_precisions(features, precision=precision)
+    embeddings = cast_precisions(embeddings, precision=precision)
     if "geometry" in batch.keys():
-        (geometry,) = cast_precisions(batch["geometry"], precision=precision)
+        geometry = cast_precisions(batch["geometry"], precision=precision)
     else:
         geometry = None
 
     all_metrics = {}
     if datapipe.config.model_type == "combined":
+        # This is hard coded for Typhon.  If you have more point clouds,
+        # your mileage may vary.
         modes = ["surface", "volume"]
     elif datapipe.config.model_type == "surface":
         modes = [
@@ -280,41 +307,95 @@ def forward_pass(
             features, geometry = pad_input_for_fp8(features, embeddings, geometry)
 
         if "geometry" in batch.keys():
+            # This is the Typhon path
             outputs = model(
                 global_embedding=features, local_embedding=embeddings, geometry=geometry
             )
-        else:
-            outputs = model(fx=features, embedding=embeddings)
 
-        outputs = [unpad_output_for_fp8(o, output_pad_size) for o in outputs]
+            outputs = unpad_output_for_fp8(outputs, output_pad_size)
+            # Loss per point cloud:
+            loss = loss_fn(outputs, targets)
+            # Log them too:
+            for i, mode in enumerate(modes):
+                all_metrics[f"loss/{mode}"] = loss[i]
+            # Averaging over point cloud inputs, instead of summing.
+            full_loss = torch.mean(torch.stack(loss))
 
-        loss = [torch.nn.functional.mse_loss(o, t) for o, t in zip(outputs, targets)]
-        for i, _loss in enumerate(loss):
-            all_metrics[f"loss/{modes[i]}"] = _loss
+        else:
+            # This is the Transolver path
+            outputs = model(fx=features, embedding=embeddings)
+            outputs = unpad_output_for_fp8(outputs, output_pad_size)
+            full_loss = torch.nn.functional.mse_loss(outputs, targets)
 
-        full_loss = torch.sum(torch.stack(loss))
+            all_metrics[f"loss/{modes[0]}"] = full_loss
 
     air_density = batch["air_density"] if "air_density" in batch.keys() else None
     stream_velocity = (
         batch["stream_velocity"] if "stream_velocity" in batch.keys() else None
     )
 
-    for i in range(len(outputs)):
-        unscaled_outputs = datapipe.unscale_model_targets(
-            outputs[i],
-            air_density=air_density,
-            stream_velocity=stream_velocity,
-            factor_type=modes[i],
-        )
-        unscaled_targets = datapipe.unscale_model_targets(
-            targets[i],
-            air_density=air_density,
-            stream_velocity=stream_velocity,
-            factor_type=modes[i],
-        )
+    unscaled_outputs = tensorwise(datapipe.unscale_model_targets)(
+        outputs,
+        air_density=air_density,
+        stream_velocity=stream_velocity,
+        factor_type=modes,
+    )
+    unscaled_targets = tensorwise(datapipe.unscale_model_targets)(
+        targets,
+        air_density=air_density,
+        stream_velocity=stream_velocity,
+        factor_type=modes,
+    )
+    metrics = metrics_fn(unscaled_outputs, unscaled_targets, dist_manager, modes)
 
-        metrics = metrics_fn(unscaled_outputs, unscaled_targets, dist_manager, modes[i])
-        all_metrics.update(metrics)
+    # In the combined mode, this is a list of dicts.  Merge them.
+    metrics = (
+        {k: v for d in metrics for k, v in d.items()}
+        if isinstance(metrics, list)
+        else metrics
+    )
+    all_metrics.update(metrics)
+
+    # if "geometry" in batch.keys():
+    #     print(f"HERE")
+    #     unscaled_outputs = []
+    #     unscaled_targets = []
+    #     for i in range(len(outputs)):
+    #         local_unscaled_outputs = datapipe.unscale_model_targets(
+    #             outputs[i],
+    #             air_density=air_density,
+    #             stream_velocity=stream_velocity,
+    #             factor_type=modes[i],
+    #         )
+    #         local_unscaled_targets = datapipe.unscale_model_targets(
+    #             targets[i],
+    #             air_density=air_density,
+    #             stream_velocity=stream_velocity,
+    #             factor_type=modes[i],
+    #         )
+    #         print(f"local_unscaled_outputs: {local_unscaled_outputs.shape}")
+    #         print(f"local_unscaled_targets: {local_unscaled_targets.shape}")
+    #         metrics = metrics_fn(local_unscaled_outputs, local_unscaled_targets, dist_manager, modes[i])
+    #         print(f"metrics: {metrics}")
+    #         all_metrics.update(metrics)
+    #         unscaled_outputs.append(local_unscaled_outputs)
+    #         unscaled_targets.append(local_unscaled_targets)
+    # else:
+    #     unscaled_outputs = datapipe.unscale_model_targets(
+    #         outputs,
+    #         air_density=air_density,
+    #         stream_velocity=stream_velocity,
+    #         factor_type=modes[0],
+    #     )
+    #     unscaled_targets = datapipe.unscale_model_targets(
+    #         targets,
+    #         air_density=air_density,
+    #         stream_velocity=stream_velocity,
+    #         factor_type=modes[0],
+    #     )
+
+    #     metrics = metrics_fn(unscaled_outputs, unscaled_targets, dist_manager, modes[0])
+    #     all_metrics.update(metrics)
 
     return full_loss, all_metrics, (unscaled_outputs, unscaled_targets)
 
diff --git a/examples/cfd/external_aerodynamics/transolver/src/utils.py b/examples/cfd/external_aerodynamics/transolver/src/utils.py
new file mode 100644
index 0000000000..a5484e9747
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/transolver/src/utils.py
@@ -0,0 +1,102 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from collections.abc import Iterable, Sequence
+import torch
+import functools
+
+_SEQUENCE_BLOCKLIST = (torch.Tensor, str, bytes)
+
+
+def _is_tensor_sequence(x):
+    return isinstance(x, Sequence) and not isinstance(x, _SEQUENCE_BLOCKLIST)
+
+
+def _coerce_iterable(arg):
+    """
+    Normalize iterable inputs so tensorwise can unzip any sequence-like object,
+    even if it is only an iterator (e.g., zip objects of strings or constants).
+    """
+    if _is_tensor_sequence(arg):
+        return arg, True
+    if isinstance(arg, Iterable) and not isinstance(arg, _SEQUENCE_BLOCKLIST):
+        return tuple(arg), True
+    return arg, False
+
+
+def tensorwise(fn):
+    """
+    Decorator: allow fn(tensor, ...) or fn(list-of-tensors, ...).
+    If any argument is a sequence of tensors, apply fn elementwise. Non-sequence
+    iterables (zip objects, generators of strings, etc.) are automatically
+    materialized so they can participate in the elementwise zip as well.
+    All sequences must be the same length.
+    """
+
+    @functools.wraps(fn)
+    def wrapper(*args, **kwargs):
+        # Detect sequences while allowing generic iterables (e.g., zip objects)
+        normalized_args = []
+        seq_flags = []
+        for arg in args:
+            normalized_arg, is_seq = _coerce_iterable(arg)
+            normalized_args.append(normalized_arg)
+            seq_flags.append(is_seq)
+
+        normalized_kwargs = {}
+        kw_seq_flags = {}
+        for key, value in kwargs.items():
+            normalized_value, is_seq = _coerce_iterable(value)
+            normalized_kwargs[key] = normalized_value
+            kw_seq_flags[key] = is_seq
+
+        any_seq = any(seq_flags) or any(kw_seq_flags.values())
+
+        if not any_seq:
+            # Nothing is a sequence — call normally
+            return fn(*normalized_args, **normalized_kwargs)
+
+        # All sequence arguments must be sequences of the same length
+        # Collect all sequences (positional + keyword)
+        seq_lengths = {len(a) for a, flag in zip(normalized_args, seq_flags) if flag}
+        seq_lengths.update(
+            len(normalized_kwargs[k]) for k, flag in kw_seq_flags.items() if flag
+        )
+        lengths = seq_lengths
+        if len(lengths) != 1:
+            raise ValueError(
+                f"Sequence arguments must have same length; got lengths {lengths}."
+            )
+
+        L = lengths.pop()
+
+        outs = []
+        for i in range(L):
+            # Rebuild ith positional args
+            ith_args = [
+                (a[i] if is_s else a) for a, is_s in zip(normalized_args, seq_flags)
+            ]
+            # Rebuild ith keyword args
+            ith_kwargs = {
+                k: (v[i] if kw_seq_flags[k] else v)
+                for k, v in normalized_kwargs.items()
+            }
+            outs.append(fn(*ith_args, **ith_kwargs))
+
+        return outs
+
+    return wrapper
diff --git a/physicsnemo/datapipes/cae/transolver_datapipe.py b/physicsnemo/datapipes/cae/transolver_datapipe.py
index 083d5e98c8..9c6df110a4 100644
--- a/physicsnemo/datapipes/cae/transolver_datapipe.py
+++ b/physicsnemo/datapipes/cae/transolver_datapipe.py
@@ -479,22 +479,10 @@ def process_data(self, data_dict):
             outputs = self.preprocess_surface_data(
                 data_dict, center_of_mass, scale_factor
             )
-            outputs["embeddings"] = [
-                outputs["embeddings"],
-            ]
-            outputs["fields"] = [
-                outputs["fields"],
-            ]
         elif self.config.model_type == "volume":
             outputs = self.preprocess_volume_data(
                 data_dict, center_of_mass, scale_factor
             )
-            outputs["embeddings"] = [
-                outputs["embeddings"],
-            ]
-            outputs["fields"] = [
-                outputs["fields"],
-            ]
         elif self.config.model_type == "combined":
             outputs_surf = self.preprocess_surface_data(
                 data_dict, center_of_mass, scale_factor

From fe60296e3cad4d01f35369255ea4fff0e0a5bac5 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 2 Dec 2025 10:28:36 -0800
Subject: [PATCH 06/32] deprecate old files

---
 .../transolver/{ => deprecated}/conf/train_surface.yaml           | 0
 .../external_aerodynamics/transolver/{ => deprecated}/datapipe.py | 0
 .../transolver/{ => deprecated}/inference_on_vtp.py               | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename examples/cfd/external_aerodynamics/transolver/{ => deprecated}/conf/train_surface.yaml (100%)
 rename examples/cfd/external_aerodynamics/transolver/{ => deprecated}/datapipe.py (100%)
 rename examples/cfd/external_aerodynamics/transolver/{ => deprecated}/inference_on_vtp.py (100%)

diff --git a/examples/cfd/external_aerodynamics/transolver/conf/train_surface.yaml b/examples/cfd/external_aerodynamics/transolver/deprecated/conf/train_surface.yaml
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/conf/train_surface.yaml
rename to examples/cfd/external_aerodynamics/transolver/deprecated/conf/train_surface.yaml
diff --git a/examples/cfd/external_aerodynamics/transolver/datapipe.py b/examples/cfd/external_aerodynamics/transolver/deprecated/datapipe.py
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/datapipe.py
rename to examples/cfd/external_aerodynamics/transolver/deprecated/datapipe.py
diff --git a/examples/cfd/external_aerodynamics/transolver/inference_on_vtp.py b/examples/cfd/external_aerodynamics/transolver/deprecated/inference_on_vtp.py
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/inference_on_vtp.py
rename to examples/cfd/external_aerodynamics/transolver/deprecated/inference_on_vtp.py

From 341343e12e25a8bd081f52f97ea971fee2b7b9fb Mon Sep 17 00:00:00 2001
From: Rishikesh Ranade <dr.rranade@gmail.com>
Date: Tue, 2 Dec 2025 06:06:23 -0800
Subject: [PATCH 07/32] typhon bq changes

---
 .../transolver/src/conf/model/typhon.yaml     |   1 +
 .../transolver/src/conf/typhon_surface.yaml   |   8 +-
 .../transolver/src/conf/typhon_volume.yaml    |   7 +-
 .../src/surface_fields_normalization.npz      | Bin 1040 -> 1040 bytes
 .../experimental/models/typhon/typhon.py      | 110 +++++++++++++++++-
 5 files changed, 121 insertions(+), 5 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml
index 166cf7bded..268453738b 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml
@@ -28,4 +28,5 @@ mlp_ratio: 2
 slice_num: 512
 use_te: false
 plus: false
+include_local_features: true # use local features
 
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
index 4bc9d6d147..87551b8202 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
@@ -21,15 +21,19 @@ defaults:
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
-run_id: "typhon/surface/bfloat16"
+run_id: "typhon/surface/bq"
 
 # Performance considerations:
 precision: float32 # float32, float16, bfloat16, or float8
-compile: false
+compile: true
 profile: false
 
 model:
   functional_dim: 6
+  n_layers: 14
+  radii: [0.05, 0.25] # radius for local features
+  neighbors_in_radius: [8, 32] # neighbors in radius for local features
+  n_hidden_local: 32 # hidden dimension for local features
 
 datapipe:
   include_sdf: false
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml
index a51849b84e..1abdf075cf 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml
@@ -21,7 +21,7 @@ defaults:
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
-run_id: "volumeX/fake-name"
+run_id: "typhon/volume/bq"
 
 # Performance considerations:
 precision: float32 # float32, float16, bfloat16, or float8
@@ -37,7 +37,10 @@ datapipe:
 model:
   functional_dim: 7
   out_dim: 5
-  n_layers: 6
+  n_layers: 14
+  radii: [0.05, 0.25] # radius for local features
+  neighbors_in_radius: [8, 32] # neighbors in radius for local features
+  n_hidden_local: 32 # hidden dimension for local features
 
 # Logging configuration
 logging:
diff --git a/examples/cfd/external_aerodynamics/transolver/src/surface_fields_normalization.npz b/examples/cfd/external_aerodynamics/transolver/src/surface_fields_normalization.npz
index b6809d416c9ba267131897355c52b9c8a5e3dee3..6245438261a7409cdc0b7dff35451f17aed62772 100644
GIT binary patch
delta 143
zcmbQhF@ZxUz?+#xmjMD48GahCjGrj9Ldo#C_dd1ij$J!!SD9AtYuSOS%4zCK*@<_S
zD;Wox*!`cj(JD_i-Adfh&I(mk?Y<dylb15CW~nIrzh`m?(?b@J>60y)jaWbiOwMOE
PWC5$mXBL~hi#ZGcx#cW_

delta 143
zcmbQhF@ZxUz?+#xmjMD4878II-J2-1LTSBx&_21xJ-dWsSDW(cwC_MwmGM~3cH*7o
zO2L1P?HK(wT1}ggZdJ3;&I(o4%YT6rCNE`N%_8G6dGF*9riUyb(<fUp8?k^4n4HgS
P$O2ZA&nz~17jqZ@iK{IB

diff --git a/physicsnemo/experimental/models/typhon/typhon.py b/physicsnemo/experimental/models/typhon/typhon.py
index 45af3f573f..8d3a03f4e3 100644
--- a/physicsnemo/experimental/models/typhon/typhon.py
+++ b/physicsnemo/experimental/models/typhon/typhon.py
@@ -28,6 +28,7 @@
     gumbel_softmax,
 )
 from physicsnemo.models.transolver.transolver import MLP
+from physicsnemo.models.layers import BQWarp, fourier_encode, Mlp
 
 from physicsnemo.models.meta import ModelMetaData
 from physicsnemo.models.module import Module
@@ -575,6 +576,64 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         return slice_tokens
 
+class GeoConvOut(nn.Module):
+    """
+    Geometry layer to project STL geometry data onto regular grids.
+    """
+
+    def __init__(
+        self,
+        input_features: int,
+        neighbors_in_radius: int,
+        base_neurons: int,
+    ):
+        """
+        Initialize the GeoConvOut layer.
+
+        Args:
+            input_features: Number of input feature dimensions
+            neighbors_in_radius: Number of neighbors in radius
+        """
+        super().__init__()
+        self.base_neurons = base_neurons
+
+        input_features_calculated = input_features * neighbors_in_radius
+
+        self.mlp = Mlp(
+            in_features=input_features_calculated,
+            hidden_features=[base_neurons, base_neurons // 2],
+            out_features=base_neurons,
+            act_layer=nn.GELU,
+            drop=0.0,
+        )
+
+        self.activation = nn.GELU
+
+        self.neighbors_in_radius = neighbors_in_radius
+
+    def forward(
+        self,
+        x: torch.Tensor,
+    ) -> torch.Tensor:
+        """
+        Process and project geometric features onto a 3D grid.
+
+        Args:
+            x: Input tensor containing coordinates of the neighboring points
+               (batch_size, n_points, n_neighbors, 3)
+            
+        Returns:
+            Processed geometry features of shape (batch_size, n_points, n_neighbors, base_neurons)
+        """
+
+        b, n_points, n_neighbors, c = x.shape
+        x = rearrange(
+            x, "b x y z -> b x (y z)", x=n_points, y=n_neighbors, z=c
+        )
+        
+        x = F.tanh(self.mlp(x))
+
+        return x
 
 
 def _normalize_dim(x):
@@ -637,6 +696,14 @@ class Typhon(Module):
         Whether to include time embeddings. Default is False.
     plus : bool, optional
         Whether to use Transolver++ features in the GALE layers. Default is False.
+    include_local_features : bool, optional
+        Whether to include local features in the global context. Default is False.
+    radii : list[float], optional
+        Radii for the local features. Default is [0.05, 0.25].
+    neighbors_in_radius : list[int], optional
+        Neighbors in radius for the local features. Default is [8, 32].
+    n_hidden_local : int, optional
+        Hidden dimension for the local features. Default is 512.
 
     Raises
     ------
@@ -726,10 +793,16 @@ def __init__(
         use_te: bool = True,
         time_input: bool = False,
         plus: bool = False,
+        include_local_features: bool = False,
+        radii: list[float] = [0.05, 0.25],
+        neighbors_in_radius: list[int] = [8, 32],
+        n_hidden_local: int = 512,
     ) -> None:
         super().__init__(meta=TyphonMetaData())
         self.__name__ = "Typhon"
 
+        self.include_local_features = include_local_features
+
         self.use_te = use_te
         # Check that the hidden dimension and head dimensions are compatible:
         if not n_hidden % n_head == 0:
@@ -740,6 +813,36 @@ def __init__(
         # These are to project geometry embeddings and global embeddings onto
         # a physical state space:
         context_dim = 0
+        if geometry_dim is not None and self.include_local_features:
+            self.radii = radii
+            self.neighbors_in_radius = neighbors_in_radius
+            self.bq_warp = nn.ModuleList()
+            self.geo_conv_out = nn.ModuleList()
+            self.geometry_features_tokenizer = nn.ModuleList()
+
+            for h in range(len(self.radii)):
+                self.bq_warp.append(BQWarp(
+                    radius=radii[h],
+                    neighbors_in_radius=neighbors_in_radius[h],
+                ))
+
+                self.geo_conv_out.append(GeoConvOut(
+                    input_features=geometry_dim,
+                    neighbors_in_radius=neighbors_in_radius[h],
+                    base_neurons=n_hidden_local,
+                ))
+                
+                self.geometry_features_tokenizer.append(ContextProjector(
+                    n_hidden_local,
+                    n_head,
+                    n_hidden // n_head,
+                    dropout,
+                    slice_num,
+                    use_te,
+                    plus,
+                ))
+                context_dim += n_hidden // n_head
+
         if geometry_dim is not None:
             self.geometry_tokenizer = ContextProjector(
                 geometry_dim,
@@ -849,8 +952,13 @@ def forward(
 
         # First, construct the global context vectors:
         global_context_input = []
-
         if geometry is not None:
+            if self.include_local_features:
+                for h in range(len(self.radii)):
+                    mapping, k_short = self.bq_warp[h](geometry, geometry)
+                    geometry_features = self.geo_conv_out[h](k_short)
+                    geometry_states = self.geometry_features_tokenizer[h](geometry_features)
+                    global_context_input.append(geometry_states)
             geometry_states = self.geometry_tokenizer(geometry)
             global_context_input.append(geometry_states)
 

From 561ede5ffa7f64feeff4d88d4c4d5a4a896d40af Mon Sep 17 00:00:00 2001
From: Rishi Ranade <rranade@oci-hsg-cs-001-login-01.cm.cluster>
Date: Thu, 4 Dec 2025 10:27:47 -0800
Subject: [PATCH 08/32] adding bq to combined pipeline (being validated)

---
 .../transolver/src/conf/datapipe/core.yaml    |   2 +-
 .../transolver/src/conf/model/typhon.yaml     |   7 +-
 .../transolver/src/conf/typhon_combined.yaml  |   2 +-
 .../transolver/src/conf/typhon_surface.yaml   |   6 +-
 .../transolver/src/conf/typhon_volume.yaml    |   8 +-
 .../src/surface_fields_normalization.npz      | Bin 1040 -> 1040 bytes
 .../src/volume_fields_normalization.npz       | Bin 0 -> 1056 bytes
 .../datapipes/cae/transolver_datapipe.py      |   5 -
 .../experimental/models/typhon/typhon.py      | 107 ++++++++++++------
 9 files changed, 84 insertions(+), 53 deletions(-)
 create mode 100644 examples/cfd/external_aerodynamics/transolver/src/volume_fields_normalization.npz

diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/core.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/core.yaml
index 83c2b9fa6e..cb972abefe 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/core.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/core.yaml
@@ -30,7 +30,7 @@ preload_depth: 1
 pin_memory: true
 
 # Sampling resolution of the point clouds:
-resolution: 100_000
+resolution: 50_000
 
 # Surface / Volume / (combined, if supported)
 mode: ???
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml
index 268453738b..38b1fd8feb 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml
@@ -19,14 +19,17 @@ functional_dim: 6
 global_dim: 2
 geometry_dim: 3
 out_dim: 4
-n_layers: 8
+n_layers: 14
 n_hidden: 256
 dropout: 0.0
 n_head: 8
 act: "gelu"
 mlp_ratio: 2
-slice_num: 512
+slice_num: 256
 use_te: false
 plus: false
 include_local_features: true # use local features
+radii: [0.05, 0.25, 1.0, 2.5] # radius for local features
+neighbors_in_radius: [8, 32, 64, 128] # neighbors in radius for local features
+n_hidden_local: 32 # hidden dimension for local features
 
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_combined.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_combined.yaml
index fbefbd7246..8da413783c 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_combined.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_combined.yaml
@@ -25,7 +25,7 @@ run_id: "typhon/combined/bfloat16"
 
 model:
   functional_dim: 
-   - 7 
+   - 6 
    - 7
   out_dim: 
    - 4
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
index 87551b8202..50dc1356f2 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
@@ -30,9 +30,9 @@ profile: false
 
 model:
   functional_dim: 6
-  n_layers: 14
-  radii: [0.05, 0.25] # radius for local features
-  neighbors_in_radius: [8, 32] # neighbors in radius for local features
+  include_local_features: true # use local features
+  radii: [0.05, 0.25, 1.0, 2.5] # radius for local features
+  neighbors_in_radius: [16, 64, 128, 256] # neighbors in radius for local features
   n_hidden_local: 32 # hidden dimension for local features
 
 datapipe:
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml
index 1abdf075cf..37ade392d1 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml
@@ -37,10 +37,10 @@ datapipe:
 model:
   functional_dim: 7
   out_dim: 5
-  n_layers: 14
-  radii: [0.05, 0.25] # radius for local features
-  neighbors_in_radius: [8, 32] # neighbors in radius for local features
-  n_hidden_local: 32 # hidden dimension for local features
+  include_local_features: true # use local features
+  radii: [0.05, 0.25, 1.0, 2.5] # radius for local features
+  neighbors_in_radius: [16, 64, 128, 256] # neighbors in radius for local features
+  n_hidden_local: 64 # hidden dimension for local features
 
 # Logging configuration
 logging:
diff --git a/examples/cfd/external_aerodynamics/transolver/src/surface_fields_normalization.npz b/examples/cfd/external_aerodynamics/transolver/src/surface_fields_normalization.npz
index 6245438261a7409cdc0b7dff35451f17aed62772..228f7550cc4be6993352c81bb56def3b95cd036d 100644
GIT binary patch
delta 218
zcmbQhF@ZxUz?+#xmjMD485T-r+@C14Lg^5n_dX`0j$L{&D@^si)a^i3wVM0D%87TD
zD@ndIwTrjiWOX7c!^+6f-U?OKzxi`_PhQHnT&cg-^pK3p<h_&ltM~e+f7*+xYzve4
z)ya8G%T@XtwH;LR!fm^p<ZORTcw`&k&B&z7jOw<@mdr*hAnPaRGaIsiZOCU9o4kuz
NngwhQi1VIV5diQ>L8brz

delta 218
zcmbQhF@ZxUz?+#xmjMD48GahCjGrj9Ldo#C_dd1ij$J!!SD9AtYuSOS%4zCK*@<_S
zD;Wox*!`cj(JD_i-Adfh&I(mk?Y<dylb13sS8AVg_E1IP|2^|}DeartVzm!d+3td8
zQIqqSmaA9=?s3@l`mAk|bh(|kc&lB2HzSiSGpgGrTQVE5fUKXK&uqv7wjrNcZ1OH<
NX%?_KAkKScMF1FiLzVyl

diff --git a/examples/cfd/external_aerodynamics/transolver/src/volume_fields_normalization.npz b/examples/cfd/external_aerodynamics/transolver/src/volume_fields_normalization.npz
new file mode 100644
index 0000000000000000000000000000000000000000..c1f0e6f463f1a4efe45af83a2eca8e0ff0c53802
GIT binary patch
literal 1056
zcmWIWW@gc4fB;2?4`=kB|Azt&1`&qb)Wkf!yn;$b1_6dCP*pH`vR|lgKqMnW8AG*t
zN@{U(k-C+Fx=osix{iW+T7FSUQDR<veo;y)NZc(kr#KZTUYwCwkP75$nCfWiDAX!Y
z0J!eQGTN7(nz-xi0vj7cC$oKt3j@spP<&G%-PQ>64Li^`#U&|&)iWmrPT4i?^s-Z`
zkhXgjsACt;HWSS=ryjhtM)pi@=Aez4FWg)9UzQO+uuh8c0NW<3gZ%HbW}wDQ<go=h
zD4t2I7_2eFxnIKJ&JhuZg6se7SD%lxKYmKfD!`kONtYQl+kmnK2y;NQ35aT71d*^z
zgsuyeJYl-nK?<Ruf$<!W1x>8znn1|_yQbMd%b<w@T@xs{v1`g;0&0Q;et<VC8^{GL
MKzJ2Mn=*rV05}NonE(I)

literal 0
HcmV?d00001

diff --git a/physicsnemo/datapipes/cae/transolver_datapipe.py b/physicsnemo/datapipes/cae/transolver_datapipe.py
index 9c6df110a4..817c6758d1 100644
--- a/physicsnemo/datapipes/cae/transolver_datapipe.py
+++ b/physicsnemo/datapipes/cae/transolver_datapipe.py
@@ -200,11 +200,6 @@ def preprocess_surface_data(
         # Build the embeddings:
         embeddings_inputs = [positions]
 
-        # Surface SDF is always 0:
-        if self.config.include_sdf:
-            sdf = torch.zeros_like(positions[:, 0:1])
-            embeddings_inputs.append(sdf)
-
         if self.config.include_normals:
             normals = data_dict["surface_normals"]
             if idx is not None:
diff --git a/physicsnemo/experimental/models/typhon/typhon.py b/physicsnemo/experimental/models/typhon/typhon.py
index 8d3a03f4e3..8936bc1a57 100644
--- a/physicsnemo/experimental/models/typhon/typhon.py
+++ b/physicsnemo/experimental/models/typhon/typhon.py
@@ -20,6 +20,7 @@
 import torch
 import torch.nn as nn
 from einops import rearrange
+import torch.nn.functional as F
 
 import physicsnemo  # noqa: F401 for docs
 from physicsnemo.utils.version_check import check_min_version
@@ -796,7 +797,7 @@ def __init__(
         include_local_features: bool = False,
         radii: list[float] = [0.05, 0.25],
         neighbors_in_radius: list[int] = [8, 32],
-        n_hidden_local: int = 512,
+        n_hidden_local: int = 32,
     ) -> None:
         super().__init__(meta=TyphonMetaData())
         self.__name__ = "Typhon"
@@ -810,38 +811,60 @@ def __init__(
                 f"Typhon requires n_hidden % n_head == 0, but instead got {n_hidden % n_head}"
             )
 
+        functional_dims = _normalize_dim(functional_dim)
+        out_dims = _normalize_dim(out_dim)
+
         # These are to project geometry embeddings and global embeddings onto
         # a physical state space:
         context_dim = 0
         if geometry_dim is not None and self.include_local_features:
             self.radii = radii
             self.neighbors_in_radius = neighbors_in_radius
+
             self.bq_warp = nn.ModuleList()
+            self.geo_conv_in = nn.ModuleList()
             self.geo_conv_out = nn.ModuleList()
             self.geometry_features_tokenizer = nn.ModuleList()
 
-            for h in range(len(self.radii)):
-                self.bq_warp.append(BQWarp(
-                    radius=radii[h],
-                    neighbors_in_radius=neighbors_in_radius[h],
-                ))
-
-                self.geo_conv_out.append(GeoConvOut(
-                    input_features=geometry_dim,
-                    neighbors_in_radius=neighbors_in_radius[h],
-                    base_neurons=n_hidden_local,
-                ))
-                
-                self.geometry_features_tokenizer.append(ContextProjector(
-                    n_hidden_local,
-                    n_head,
-                    n_hidden // n_head,
-                    dropout,
-                    slice_num,
-                    use_te,
-                    plus,
-                ))
-                context_dim += n_hidden // n_head
+            for i in range(len(functional_dims)):
+                self.bq_warp_list = nn.ModuleList()
+                self.geo_conv_in_list = nn.ModuleList()
+                self.geo_conv_out_list = nn.ModuleList()
+                self.geometry_features_tokenizer_list = nn.ModuleList()
+
+                for h in range(len(self.radii)):
+                    self.bq_warp_list.append(BQWarp(
+                        radius=radii[h],
+                        neighbors_in_radius=neighbors_in_radius[h],
+                    ))
+
+                    self.geo_conv_in_list.append(GeoConvOut(
+                        input_features=geometry_dim,
+                        neighbors_in_radius=neighbors_in_radius[h],
+                        base_neurons=n_hidden_local,
+                    ))
+
+                    self.geo_conv_out_list.append(GeoConvOut(
+                        input_features=geometry_dim,
+                        neighbors_in_radius=neighbors_in_radius[h],
+                        base_neurons=n_hidden_local,
+                    ))
+                    
+                    self.geometry_features_tokenizer_list.append(ContextProjector(
+                        n_hidden_local,
+                        n_head,
+                        n_hidden // n_head,
+                        dropout,
+                        slice_num,
+                        use_te,
+                        plus,
+                    ))
+                    context_dim += n_hidden // n_head
+
+                self.bq_warp.append(nn.ModuleList(self.bq_warp_list))
+                self.geo_conv_in.append(nn.ModuleList(self.geo_conv_in_list))
+                self.geo_conv_out.append(nn.ModuleList(self.geo_conv_out_list))
+                self.geometry_features_tokenizer.append(nn.ModuleList(self.geometry_features_tokenizer_list))
 
         if geometry_dim is not None:
             self.geometry_tokenizer = ContextProjector(
@@ -859,9 +882,6 @@ def __init__(
                 global_dim, n_head, n_hidden // n_head, dropout, slice_num, use_te, plus
             )
             context_dim += n_hidden // n_head
-
-        functional_dims = _normalize_dim(functional_dim)
-        out_dims = _normalize_dim(out_dim)
       
         if len(functional_dims) != len(out_dims):
             raise ValueError(
@@ -886,14 +906,13 @@ def __init__(
             ]
         )
 
-
         self.n_hidden = n_hidden
 
         self.blocks = nn.ModuleList(
             [
                 GALE_block(
                     num_heads=n_head,
-                    hidden_dim=n_hidden,
+                    hidden_dim=n_hidden + n_hidden_local * len(self.radii) if self.include_local_features else n_hidden,
                     dropout=dropout,
                     act=act,
                     mlp_ratio=mlp_ratio,
@@ -911,7 +930,7 @@ def __init__(
             self.ln_mlp_out = nn.ModuleList(
                 [
                     te.LayerNormLinear(
-                        in_features=n_hidden, out_features=o
+                        in_features=n_hidden + n_hidden_local * len(self.radii) if self.include_local_features else n_hidden, out_features=o
                     ) for o in out_dims
                  ]
             )
@@ -919,8 +938,8 @@ def __init__(
             self.ln_mlp_out = nn.ModuleList(
                 [
                     nn.Sequential(
-                        nn.LayerNorm(n_hidden),
-                        nn.Linear(n_hidden, o),
+                        nn.LayerNorm(n_hidden + n_hidden_local * len(self.radii) if self.include_local_features else n_hidden),
+                        nn.Linear(n_hidden + n_hidden_local * len(self.radii) if self.include_local_features else n_hidden, o),
                     )
                     for o in out_dims
                 ]
@@ -954,14 +973,15 @@ def forward(
         global_context_input = []
         if geometry is not None:
             if self.include_local_features:
-                for h in range(len(self.radii)):
-                    mapping, k_short = self.bq_warp[h](geometry, geometry)
-                    geometry_features = self.geo_conv_out[h](k_short)
-                    geometry_states = self.geometry_features_tokenizer[h](geometry_features)
-                    global_context_input.append(geometry_states)
+                for i in range(len(local_embedding)):
+                    for h in range(len(self.radii)):
+                        mapping, k_short = self.bq_warp[i][h](local_embedding[i][:, :, :3], geometry)
+                        geometry_features = self.geo_conv_in[i][h](k_short)
+                        geometry_states = self.geometry_features_tokenizer[i][h](geometry_features)
+                        global_context_input.append(geometry_states)
             geometry_states = self.geometry_tokenizer(geometry)
             global_context_input.append(geometry_states)
-
+        
         if global_embedding is not None:
             global_states = self.global_tokenizer(global_embedding)
             global_context_input.append(global_states)
@@ -970,11 +990,24 @@ def forward(
         if len(global_context_input) > 0:
             embedding_states = torch.cat(global_context_input, dim=-1)
 
+        if self.include_local_features and geometry is not None:
+            local_embedding_bq = []
+            for i in range(len(local_embedding)):
+                local_embedding_list_radii = []
+                for h in range(len(self.radii)):
+                    mapping, k_short = self.bq_warp[i][h](geometry, local_embedding[i][:, :, :3])
+                    local_features = self.geo_conv_out[i][h](k_short)
+                    local_embedding_list_radii.append(local_features)
+                local_embedding_bq.append(torch.cat(local_embedding_list_radii, dim=-1))
+
         local_embedding = _normalize_tensor(local_embedding)
 
         # Project the inputs to the hidden dimension:
         x = [ self.preprocess[i](le) for i, le in enumerate(local_embedding) ]
 
+        if self.include_local_features:
+            x = [torch.cat([x[i], local_embedding_bq[i]], dim=-1) for i in range(len(x))]
+
         for block in self.blocks:
             x = block(x, embedding_states)
 

From 9575c863859e90ccc554bced6052a95ab639e18b Mon Sep 17 00:00:00 2001
From: Rishi Ranade <rranade@oci-hsg-cs-001-login-01.cm.cluster>
Date: Sat, 6 Dec 2025 12:29:38 -0800
Subject: [PATCH 09/32] updating typhon model, removing combined and new typhon
 example

---
 .../external_aerodynamics/typhon/README.md    | 284 ++++++
 .../typhon/requirements.txt                   |   9 +
 .../typhon/src/benchmark_dataloading.py       | 166 ++++
 .../typhon/src/compute_normalizations.py      | 159 ++++
 .../typhon/src/conf/datapipe/combined.yaml    |  35 +
 .../typhon/src/conf/datapipe/core.yaml        |  60 ++
 .../typhon/src/conf/datapipe/surface.yaml     |  33 +
 .../typhon/src/conf/datapipe/volume.yaml      |  31 +
 .../typhon/src/conf/model/transolver.yaml     |  34 +
 .../typhon/src/conf/model/typhon.yaml         |  35 +
 .../typhon/src/conf/training/base.yaml        |  32 +
 .../typhon/src/conf/typhon_surface.yaml       |  48 +
 .../typhon/src/conf/typhon_volume.yaml        |  48 +
 .../typhon/src/inference_on_zarr.py           | 518 ++++++++++
 .../typhon/src/metrics.py                     | 164 ++++
 .../typhon/src/preprocess.py                  | 121 +++
 .../src/surface_fields_normalization.npz      | Bin 0 -> 1040 bytes
 .../external_aerodynamics/typhon/src/train.py | 900 ++++++++++++++++++
 .../external_aerodynamics/typhon/src/utils.py | 102 ++
 .../src/volume_fields_normalization.npz       | Bin 0 -> 1056 bytes
 .../experimental/models/typhon/typhon.py      | 120 +--
 21 files changed, 2818 insertions(+), 81 deletions(-)
 create mode 100644 examples/cfd/external_aerodynamics/typhon/README.md
 create mode 100644 examples/cfd/external_aerodynamics/typhon/requirements.txt
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/benchmark_dataloading.py
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/compute_normalizations.py
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/combined.yaml
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/core.yaml
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/surface.yaml
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/volume.yaml
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/conf/model/transolver.yaml
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/conf/model/typhon.yaml
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/conf/training/base.yaml
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/conf/typhon_surface.yaml
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/conf/typhon_volume.yaml
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/inference_on_zarr.py
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/metrics.py
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/preprocess.py
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/surface_fields_normalization.npz
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/train.py
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/utils.py
 create mode 100644 examples/cfd/external_aerodynamics/typhon/src/volume_fields_normalization.npz

diff --git a/examples/cfd/external_aerodynamics/typhon/README.md b/examples/cfd/external_aerodynamics/typhon/README.md
new file mode 100644
index 0000000000..d6dd7046d8
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/README.md
@@ -0,0 +1,284 @@
+<!-- markdownlint-disable -->
+# `Transolver` for External Aerodynamics on Irregular Meshes
+
+This example is an end to end training recipe for the `Transolver` model, which can
+be run on surface or volume data.
+
+`Transolver` is a high-performance surrogate model for CFD solvers. The Transolver model
+adapts the Attention mechanism, encouraging the learning of meaningful representations.
+In each PhysicsAttention layer, input points are projected onto state vectors through
+learnable transformations and weights. These transformations are then used to compute
+self-attention among all state vectors, and the same weights are reused to project
+states back to each input point.
+
+## External Aerodynamics CFD Example: Overview
+
+This directory contains the essential components for training and evaluating a
+model tailored to external aerodynamics CFD problems built on `Transolver`.
+
+By stacking multiple PhysicsAttention layers, the `Transolver` model learns to map from
+the functional input space to the output space with high fidelity. The PhysicsNeMo
+implementation closely follows the original Transolver architecture
+([https://github.com/thuml/Transolver](https://github.com/thuml/Transolver)), but
+introduces modifications for improved numerical stability and compatibility with NVIDIA
+TransformerEngine.
+
+The training example for Transolver uses the [DrivaerML dataset](https://caemldatasets.org/drivaerml/).
+
+As a concrete example, we are training external aerodynamics surrogate models for automobiles.
+`Transolver` takes as input a point cloud on the surface or surrounding the surface,
+iteratively processing it with PhysicsAttention to produce high-fidelity predictions.
+
+## Requirements
+
+Transolver can use TransformerEngine from NVIDIA, as well as tensorstore (for IO),
+zarr, einops and a few other python packages.  Install them with `pip install -r requirements.txt`
+as well as physicsnemo 25.11 or higher.
+
+## Using Transolver for External Aerodynamics
+
+1. Prepare the Dataset.  Transolver uses the same Zarr outputs as other models with DrivaerML.
+`PhysicsNeMo` has a related project to help with data processing, called [PhysicsNeMo-Curator](https://github.com/NVIDIA/physicsnemo-curator).
+Using `PhysicsNeMo-Curator`, the data needed to train can be setup easily.
+Please refer to [these instructions on getting started](https://github.com/NVIDIA/physicsnemo-curator?tab=readme-ov-file#what-is-physicsnemo-curator)
+with `PhysicsNeMo-Curator`.  For specifics of preparing the dataset for this example,
+see the [download](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/domino/README.md#download-drivaerml-dataset)
+and [preprocessing](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/domino/README.md)
+instructions from `physicsnemo-curator`.  Users should apply the
+preprocessing steps locally to produce `zarr` output files.
+
+2. Train your model.  The model and training configuration is configured with
+`hydra`, and two configurations are available: `transolver_surface` and `transolver_volume`.
+Find configurations in `src/conf`, where you can control both network properties
+and training properties. See below for an overview and explanation of key
+parameters that may be of special interest.
+
+3. Use the trained model to perform inference.  This example contains two
+inference examples: one for inference on the validation set, already in
+Zarr format.  The `.vtp` inference pipeline is being updated to accomodate Transolver.
+
+The following sections contain further details on the training and inference
+recipe.
+
+## Model Training
+
+To train the model, first we compute normalization factors on the dataset to
+make the predictive quantities output in a well-defined range. The included
+script, `compute_normalizations.py`, will compute the normalization
+factors.  Once run, it should save to an output file similar to
+"surface_fields_normalization.npz".  This will get loaded during training.
+The normalization file location can be configured via `data.normalization_dir`
+in the training configuration (defaults to current directory).
+
+> By default, the normalization sets the mean to 0.0 and std to 1.0 of all labels
+> in the dataset, computing the mean across the train dataset.  You could adapt
+> this to a different normalization, however take care to update both the
+> preprocessing as well as inference scripts.  Min/Max is another popular strategy.
+
+To configure your training run, use `hydra`.  The
+config contains sections for the model, data, optimizer, and training settings.
+For details on the model parameters, see the API for `physicsnemo.models.transolver`.
+
+To fit the training into memory, you can apply on-the-fly downsampling to the data
+with `data.resolution=N`, where `N` is how many points per GPU to use.  This dataloader
+will yield the full data examples in shapes of `[1, K, f]` where `K` is the resolution
+of the mesh, and `f` is the feature space (3 for points, normals, etc.  4 for surface
+fields).  Downsampling happens in the preprocessing pipeline.
+
+During training, the configuration uses a flat learning rate that decays every 100
+epochs, and bfloat16 format by default.  The scheduler and learning rate
+may be configured.  
+
+The Optimizer for this training is the `Muon` optimizer - available only in
+`pytorch>=2.9.0`.  While not strictly required, we have found the `muon` optimizer
+performs substantially better on these architectures than standard `AdamW` and
+a oneCycle schedule.
+
+### Training Precision
+
+Transolver, as a transformer-like architecture, has support for NVIDIA's
+[TransformerEngine](https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/index.html)
+built in.  You can enable/disable the transformer engine path in the model with
+`model.use_te=[True | False]`.  Available precisions for training with `transformer_engine`
+are `training.precision=["float32" | "float16" | "bfloat16" | "float8" ]`.  In `float8`
+precision, the TransformerEngine Hybrid recipe is used for casting weights and inputs
+in the forward and backwards passes.  For more details on `float8` precision, see
+the fp8 guide from
+[TransformerEngine](https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/examples/fp8_primer.html).
+When using fp8, the training script will automatically pad and unpad the input and output,
+respectively, to use the fp8 hardware correctly.
+
+> **Float8** precisions are only available on GPUs with fp8 tensorcore support, such
+> as Hopper, Blackwell, Ada Lovelace, and others.
+
+### Other Configuration Settings
+
+Several other important configuration settings are available:
+
+- `checkpoint_dir` sets the directory for saving model checkpoints (defaults to `output_dir`
+if not specified), allowing separation of checkpoints from other outputs.
+- `compile` will use `torch.compile` for optimized performance.  It is not
+compatible with `transformer_engine` (`model.use_te=True`).  If TransformerEngine is
+not used, and half precision is, `torch.compile` is recommended for improved performance.
+- `training.num_epochs` controls the total number of epochs used during training.
+- `training.save_interval` will dictate how often the model weights and training
+tools are checkpointed.
+
+> **Note** Like other parameters of the model, changing the value of `model.use_te`
+> will make checkpoints incompatible.
+
+The training script supports data-parallel training via PyTorch DDP.  In a future
+update, we may enable domain parallelism via FSDP and ShardTensor.
+
+The script can be launched on a single GPU with, for example,
+
+```bash
+python train.py --config-name transolver_surface
+```
+
+or, for multi-GPU training, use `torchrun` or other distributed job launch tools.
+
+Example output for one epoch of the script, in an 8 GPU run, looks like:
+
+```default
+[2025-07-17 14:27:36,040][training][INFO] - Epoch 47 [0/54] Loss: 0.117565 Duration: 0.78s
+[2025-07-17 14:27:36,548][training][INFO] - Epoch 47 [1/54] Loss: 0.109625 Duration: 0.51s
+[2025-07-17 14:27:37,048][training][INFO] - Epoch 47 [2/54] Loss: 0.122574 Duration: 0.50s
+[2025-07-17 14:27:37,556][training][INFO] - Epoch 47 [3/54] Loss: 0.125667 Duration: 0.51s
+[2025-07-17 14:27:38,063][training][INFO] - Epoch 47 [4/54] Loss: 0.101863 Duration: 0.51s
+[2025-07-17 14:27:38,547][training][INFO] - Epoch 47 [5/54] Loss: 0.113324 Duration: 0.48s
+[2025-07-17 14:27:39,054][training][INFO] - Epoch 47 [6/54] Loss: 0.115478 Duration: 0.51s
+...[remove for brevity]...
+[2025-07-17 14:28:00,662][training][INFO] - Epoch 47 [49/54] Loss: 0.107935 Duration: 0.49s
+[2025-07-17 14:28:01,178][training][INFO] - Epoch 47 [50/54] Loss: 0.100087 Duration: 0.52s
+[2025-07-17 14:28:01,723][training][INFO] - Epoch 47 [51/54] Loss: 0.097733 Duration: 0.55s
+[2025-07-17 14:28:02,194][training][INFO] - Epoch 47 [52/54] Loss: 0.116489 Duration: 0.47s
+[2025-07-17 14:28:02,605][training][INFO] - Epoch 47 [53/54] Loss: 0.104865 Duration: 0.41s
+
+Epoch 47 Average Metrics:
++-------------+---------------------+
+|   Metric    |    Average Value    |
++-------------+---------------------+
+| l2_pressure | 0.20262257754802704 |
+| l2_shear_x  | 0.2623567283153534  |
+| l2_shear_y  | 0.35603201389312744 |
+| l2_shear_z  | 0.38965049386024475 |
++-------------+---------------------+
+
+[2025-07-17 14:28:02,834][training][INFO] - Val [0/6] Loss: 0.114801 Duration: 0.22s
+[2025-07-17 14:28:03,074][training][INFO] - Val [1/6] Loss: 0.111632 Duration: 0.24s
+[2025-07-17 14:28:03,309][training][INFO] - Val [2/6] Loss: 0.105342 Duration: 0.23s
+[2025-07-17 14:28:03,537][training][INFO] - Val [3/6] Loss: 0.111033 Duration: 0.23s
+[2025-07-17 14:28:03,735][training][INFO] - Val [4/6] Loss: 0.099963 Duration: 0.20s
+[2025-07-17 14:28:03,903][training][INFO] - Val [5/6] Loss: 0.092340 Duration: 0.17s
+
+Epoch 47 Validation Average Metrics:
++-------------+---------------------+
+|   Metric    |    Average Value    |
++-------------+---------------------+
+| l2_pressure | 0.19346082210540771 |
+| l2_shear_x  | 0.26041051745414734 |
+| l2_shear_y  | 0.3589216470718384  |
+| l2_shear_z  |  0.370105117559433  |
++-------------+---------------------+
+```
+
+## Dataset Inference
+
+The validation dataset in Zarr format can be loaded, processed, and the L2
+metrics summarized in `inference_on_zarr.py`.  For surface data, this script will also
+compute the drag and lift coefficients and the R^2 correlation of the predictions.
+
+To run inference on surface data, it's necessary to add a line to your launch command:
+
+```
+python src/inference_on_zarr.py --config-name transolver_surface run_id=/path/to/model/
+
+```
+
+The `data.return_mesh_features` flag can also be set in the config file.  It is
+disabled for training but necessary for inference.  The model path should be
+the folder containing your saved checkpoints.
+
+
+To ensure correct calculation of drag and lift, and accurate overall metrics,
+the inference script will chunk a full-resolution training example into batches,
+and stitch the outputs together at the end.  Output will appear as a table
+with all metrics for that mode, for example:
+
+```
+|   Batch |   Loss |   L2 Pressure |   L2 Shear X |   L2 Shear Y |   L2 Shear Z |   Predicted Drag Coefficient |   Pred Lift Coefficient |   True Drag Coefficient |   True Lift Coefficient |   Elapsed (s) |
+|---------|--------|---------------|--------------|--------------|--------------|------------------------------|-------------------------|-------------------------|-------------------------|---------------|
+|       0 | 0.0188 |        0.0491 |       0.0799 |       0.1023 |       0.1174 |                      488.075 |                140.365  |                 475.534 |                135.944  |        8.1281 |
+|       1 | 0.0144 |        0.045  |       0.0659 |       0.0955 |       0.107  |                      404.472 |                 21.8897 |                 406.484 |                 35.6202 |        0.7348 |
+|       2 | 0.0239 |        0.0505 |       0.0835 |       0.1101 |       0.1592 |                      383.219 |                 41.973  |                 373.999 |                 43.7198 |        1.6722 |
+|       3 | 0.0255 |        0.0526 |       0.088  |       0.1151 |       0.1305 |                      576.671 |                230.185  |                 579.655 |                210.01   |        1.4369 |
+|       4 | 0.0214 |        0.0498 |       0.0849 |       0.109  |       0.1229 |                      451.478 |                -45.3076 |                 447.109 |                -36.7298 |        1.8973 |
+|       5 | 0.0147 |        0.0402 |       0.0671 |       0.0923 |       0.0992 |                      419.76  |                -87.7945 |                 424.63  |                -83.8417 |        1.7255 |
+|       6 | 0.0171 |        0.0463 |       0.0742 |       0.1016 |       0.126  |                      350.877 |                -32.1908 |                 338.721 |                -25.5008 |        1.3738 |
+|       7 | 0.0248 |        0.0596 |       0.0989 |       0.123  |       0.1299 |                      420.122 |                -42.3073 |                 420.772 |                -16.9301 |        1.9126 |
+|       8 | 0.0178 |        0.0453 |       0.0736 |       0.1021 |       0.118  |                      380.704 |                -90.6937 |                 374.134 |                -87.2395 |        1.8081 |
+|       9 | 0.0297 |        0.0629 |       0.1004 |       0.1245 |       0.1418 |                      400.315 |               -149.927  |                 396.178 |               -147.33   |        1.6693 |
+|      10 | 0.0303 |        0.0674 |       0.0978 |       0.1233 |       0.1455 |                      602.585 |                249.985  |                 588.987 |                237.999  |        1.6581 |
+|      11 | 0.0188 |        0.0514 |       0.0772 |       0.1006 |       0.1114 |                      593.366 |                155.859  |                 590.833 |                167.067  |        1.6914 |
+|      12 | 0.0147 |        0.0436 |       0.0681 |       0.0929 |       0.1009 |                      457.252 |                 77.7093 |                 449.866 |                 77.2836 |        1.734  |
+|      13 | 0.0226 |        0.0529 |       0.0902 |       0.1092 |       0.1319 |                      374.561 |                -88.923  |                 372.675 |               -101.469  |        1.3918 |
+|      14 | 0.0186 |        0.0591 |       0.0758 |       0.1056 |       0.1199 |                      516.445 |                275.197  |                 512.238 |                274.633  |        1.7587 |
+|      15 | 0.0145 |        0.0443 |       0.0691 |       0.0974 |       0.1083 |                      397.664 |                 44.4129 |                 395.376 |                 31.417  |        1.6531 |
+|      16 | 0.019  |        0.0502 |       0.0828 |       0.1028 |       0.1145 |                      502.079 |                 75.96   |                 501.056 |                 77.4457 |        1.6815 |
+|      17 | 0.0155 |        0.0459 |       0.0721 |       0.1003 |       0.1064 |                      472.191 |                138.568  |                 460.808 |                139.42   |        1.7288 |
+|      18 | 0.0186 |        0.0549 |       0.0783 |       0.1074 |       0.1162 |                      482.58  |                 37.7236 |                 482.344 |                 37.2805 |        1.7915 |
+|      19 | 0.0148 |        0.0425 |       0.078  |       0.1004 |       0.113  |                      448.504 |                157.548  |                 446.845 |                173.68   |        1.8042 |
+|      20 | 0.0144 |        0.0424 |       0.072  |       0.0946 |       0.0993 |                      500.781 |                 81.4317 |                 490.024 |                 85.8991 |        1.7812 |
+|      21 | 0.0142 |        0.0462 |       0.0669 |       0.0983 |       0.0982 |                      483.057 |                134.258  |                 473.958 |                121.551  |        1.8255 |
+|      22 | 0.0149 |        0.0432 |       0.0671 |       0.0964 |       0.1004 |                      510.518 |                162.651  |                 504.159 |                164.953  |        1.8021 |
+|      23 | 0.0182 |        0.05   |       0.074  |       0.101  |       0.116  |                      388.014 |               -223.932  |                 393.797 |               -229.571  |        2.6297 |
+|      24 | 0.0188 |        0.0486 |       0.0774 |       0.1049 |       0.1064 |                      477.557 |                -11.9395 |                 494.446 |                  7.5967 |        0.8668 |
+|      25 | 0.0229 |        0.0608 |       0.0867 |       0.1211 |       0.1507 |                      348.804 |                  5.3412 |                 341.955 |                 30.8778 |        1.5065 |
+|      26 | 0.019  |        0.0544 |       0.0814 |       0.1063 |       0.119  |                      467.791 |                170.149  |                 466.67  |                186.732  |        1.8434 |
+|      27 | 0.0154 |        0.047  |       0.0734 |       0.1014 |       0.1102 |                      426.202 |                -78.8968 |                 417.572 |                -78.867  |        1.8177 |
+|      28 | 0.0159 |        0.0455 |       0.0724 |       0.0983 |       0.1051 |                      523.8   |                165.693  |                 512.567 |                150.064  |        1.7851 |
+|      29 | 0.0243 |        0.0498 |       0.0873 |       0.112  |       0.1309 |                      481.491 |                 55.202  |                 483.593 |                 59.5569 |        1.7285 |
+|      30 | 0.021  |        0.054  |       0.0808 |       0.1097 |       0.1232 |                      508.089 |                200.01   |                 496.295 |                194.816  |        1.7602 |
+|      31 | 0.0186 |        0.0479 |       0.0771 |       0.1047 |       0.1351 |                      422.298 |                 80.0045 |                 421.175 |                 97.6633 |        1.532  |
+|      32 | 0.0205 |        0.0589 |       0.0793 |       0.1129 |       0.1308 |                      395.582 |                -12.36   |                 400.106 |                  6.3091 |        1.5378 |
+|      33 | 0.0129 |        0.0396 |       0.0679 |       0.0923 |       0.0953 |                      431.082 |                  7.8286 |                 428.801 |                  8.6182 |        1.8789 |
+|      34 | 0.0144 |        0.0412 |       0.0662 |       0.0893 |       0.0979 |                      530.599 |                179.193  |                 532.033 |                158.92   |        1.8429 |
+|      35 | 0.0139 |        0.0424 |       0.0716 |       0.0945 |       0.1006 |                      430.982 |                  7.3476 |                 428.805 |                 -4.3425 |        1.711  |
+|      36 | 0.0167 |        0.043  |       0.0702 |       0.0975 |       0.1217 |                      381.859 |                -45.0215 |                 376.432 |                -65.0582 |        1.4227 |
+|      37 | 0.021  |        0.0516 |       0.0772 |       0.1106 |       0.1302 |                      348.402 |                -84.0741 |                 347.672 |                -69.1513 |        1.5184 |
+|      38 | 0.029  |        0.0585 |       0.0895 |       0.1188 |       0.1347 |                      596.764 |                287.068  |                 586.433 |                236.509  |        1.6109 |
+|      39 | 0.0176 |        0.0472 |       0.0758 |       0.1006 |       0.1115 |                      470.259 |                 25.2451 |                 468.965 |                 38.1292 |        1.7815 |
+|      40 | 0.0309 |        0.0583 |       0.0827 |       0.1163 |       0.1649 |                      579.514 |                186.451  |                 587.644 |                177.782  |        1.6365 |
+|      41 | 0.0188 |        0.0516 |       0.0776 |       0.1084 |       0.1369 |                      349.04  |               -106.107  |                 341.44  |                -94.3054 |        1.4013 |
+|      42 | 0.014  |        0.0424 |       0.0673 |       0.0964 |       0.0977 |                      477.916 |                120.4    |                 474.075 |                116.718  |        1.8973 |
+|      43 | 0.0171 |        0.0476 |       0.071  |       0.1054 |       0.1116 |                      423.233 |                 50.4327 |                 420.448 |                 69.2674 |        1.8893 |
+|      44 | 0.0247 |        0.0613 |       0.0799 |       0.1171 |       0.141  |                      426.292 |                 -2.5913 |                 422.69  |                 20.4068 |        1.4871 |
+|      45 | 0.0161 |        0.0431 |       0.0736 |       0.0959 |       0.1007 |                      538.835 |                 71.1159 |                 544.14  |                 89.5933 |        1.7929 |
+|      46 | 0.017  |        0.0442 |       0.0722 |       0.0986 |       0.1175 |                      361.974 |               -136.836  |                 359.692 |               -151.266  |        1.4659 |
+|      47 | 0.0186 |        0.046  |       0.0778 |       0.1076 |       0.1114 |                      502.144 |                 80.8261 |                 499.45  |                102.07   |        1.9431 |
+[2025-12-01 08:19:42,350][training][INFO] - R2 score for lift: 0.9824
+[2025-12-01 08:19:42,350][training][INFO] - R2 score for drag: 0.9904
+[2025-12-01 08:19:42,351][training][INFO] - Summary:
+| Batch   |   Loss |   L2 Pressure |   L2 Shear X |   L2 Shear Y |   L2 Shear Z |   Predicted Drag Coefficient |   Pred Lift Coefficient |   True Drag Coefficient |   True Lift Coefficient |   Elapsed (s) |
+|---------|--------|---------------|--------------|--------------|--------------|------------------------------|-------------------------|-------------------------|-------------------------|---------------|
+| Mean    | 0.0191 |        0.0496 |       0.0775 |       0.1047 |       0.1191 |                      456.371 |                 51.6484 |                 453.193 |                  53.624 |        1.8114 |
+```
+
+  <!-- Alternatively, the model can be used
+directly on `.vtp` or `.stl` files as shown in `inference_on_vtp.py`.  Note that the
+script contains several parameters from the DrivaerML dataset as hardcoded variable
+names: `CpMeanTrim`, `pMeanTrim`, `wallShearStressMeanTrim`, which are used to
+compute the L2 metrics on the inference outputs. -->
+
+<!-- In `inference_on_zarr.py`, the dataset examples are downsampled and preprocessed
+exactly as in the training script.  In `inference_on_vtp.py`, however, the entire
+mesh is processed.  To enable the mesh to fit into GPU memory, the mesh is chunked
+into pieces that are then processed, and recombined to form the prediction on the
+entire mesh.  The outputs are then saved to .vtp files for downstream analysis. -->
+
+## Transolver++
+
+Transolver++ is supported with the `plus` flag to the model.  In
+our experiments, we did not see gains, but you are welcome to try it and share
+your results with us on GitHub!
diff --git a/examples/cfd/external_aerodynamics/typhon/requirements.txt b/examples/cfd/external_aerodynamics/typhon/requirements.txt
new file mode 100644
index 0000000000..ffc351ec7b
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/requirements.txt
@@ -0,0 +1,9 @@
+hydra-core
+tabulate
+tensorboard
+termcolor
+torchinfo
+einops
+transformer_engine[pytorch]
+tensorstore
+zarr>=3.0
diff --git a/examples/cfd/external_aerodynamics/typhon/src/benchmark_dataloading.py b/examples/cfd/external_aerodynamics/typhon/src/benchmark_dataloading.py
new file mode 100644
index 0000000000..fe58c3a240
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/src/benchmark_dataloading.py
@@ -0,0 +1,166 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This is a standalone script for benchmarking and testing the Transolver
+datapipe in surface or volume mode.
+"""
+
+from pathlib import Path
+
+import time
+import os
+import re
+import torch
+
+import numpy as np
+
+from typing import Literal, Any
+
+
+import hydra
+from omegaconf import DictConfig, OmegaConf
+
+
+import torch.distributed as dist
+from torch.utils.data.distributed import DistributedSampler
+
+
+from physicsnemo.distributed import DistributedManager
+from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
+
+from physicsnemo.datapipes.cae.transolver_datapipe import (
+    create_transolver_dataset,
+)
+
+
+from physicsnemo.utils.profiling import profile, Profiler
+
+
+@profile
+def main(cfg: DictConfig):
+    """Main training function
+
+    Args:
+        cfg: Hydra configuration object
+    """
+
+    DistributedManager.initialize()
+
+    # Set up distributed training
+    dist_manager = DistributedManager()
+
+    # Set up logging
+    logger = RankZeroLoggingWrapper(PythonLogger(name="training"), dist_manager)
+
+    logger.info(f"Config:\n{OmegaConf.to_yaml(cfg, resolve=True)}")
+
+    # Load the normalization file:
+    norm_dir = getattr(cfg.data, "normalization_dir", ".")
+    if cfg.data.mode == "surface":
+        norm_file = str(Path(norm_dir) / "surface_fields_normalization.npz")
+    elif cfg.data.mode == "volume":
+        norm_file = str(Path(norm_dir) / "volume_fields_normalization.npz")
+
+    norm_data = np.load(norm_file)
+    norm_factors = {
+        "mean": torch.from_numpy(norm_data["mean"]).to(dist_manager.device),
+        "std": torch.from_numpy(norm_data["std"]).to(dist_manager.device),
+    }
+    # Training dataset
+
+    train_dataloader = create_transolver_dataset(
+        cfg.data,
+        phase="train",
+        scaling_factors=norm_factors,
+    )
+
+    # Validation dataset
+
+    val_dataloader = create_transolver_dataset(
+        cfg.data,
+        phase="val",
+        scaling_factors=norm_factors,
+    )
+
+    num_replicas = dist_manager.world_size
+    data_rank = dist_manager.rank
+
+    # Set up distributed samplers
+    train_sampler = torch.utils.data.distributed.DistributedSampler(
+        train_dataloader,
+        num_replicas=num_replicas,
+        rank=data_rank,
+        shuffle=True,
+        drop_last=True,
+    )
+
+    val_sampler = torch.utils.data.distributed.DistributedSampler(
+        val_dataloader,
+        num_replicas=num_replicas,
+        rank=data_rank,
+        shuffle=False,  # No shuffling for validation
+        drop_last=True,
+    )
+
+    # Training loop
+    logger.info("Starting IO benchmark...")
+    for epoch in range(1):
+        # Set the epoch in the samplers
+        train_sampler.set_epoch(epoch)
+        val_sampler.set_epoch(epoch)
+        train_dataloader.dataset.set_indices(list(train_sampler))
+        val_dataloader.dataset.set_indices(list(val_sampler))
+
+        start_time = time.time()
+        # Training phase
+        start = time.time()
+        with Profiler():
+            for i_batch, data in enumerate(train_dataloader):
+                print(f"Train {i_batch} elapsed time: {time.time() - start}")
+                start = time.time()
+
+        end_time = time.time()
+        train_duration = end_time - start_time
+
+        # Log epoch results
+        logger.info(
+            f"Epoch [{epoch}/{cfg.training.num_epochs}] [duration: {train_duration:.2f}s]"
+        )
+
+    logger.info("Benchmark completed!")
+
+
+@hydra.main(version_base=None, config_path="conf", config_name="train_surface")
+def launch(cfg: DictConfig):
+    """Launch training with hydra configuration
+
+    Args:
+        cfg: Hydra configuration object
+    """
+
+    # If you want to use `line_profiler` or PyTorch's profiler, enable them here.
+
+    profiler = Profiler()
+    if cfg.profile:
+        profiler.enable("torch")
+    profiler.initialize()
+    main(cfg)
+    profiler.finalize()
+
+
+if __name__ == "__main__":
+    launch()
diff --git a/examples/cfd/external_aerodynamics/typhon/src/compute_normalizations.py b/examples/cfd/external_aerodynamics/typhon/src/compute_normalizations.py
new file mode 100644
index 0000000000..749a7ab2f7
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/src/compute_normalizations.py
@@ -0,0 +1,159 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""
+This file provides utilities to compute normalization statistics (mean, std, min, max)
+for a given field in a dataset, typically used for preprocessing in CFD workflows.
+"""
+
+from pathlib import Path
+import time
+
+import numpy as np
+import torch
+import hydra
+from omegaconf import DictConfig
+
+from physicsnemo.datapipes.cae.cae_dataset import CAEDataset
+
+
+def compute_mean_std_min_max(
+    dataset: CAEDataset,
+    field_key: str,
+    max_samples: int = 100,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    Compute the mean, standard deviation, minimum, and maximum for a specified field
+    across all samples in a dataset.
+
+    Uses a numerically stable online algorithm for mean and variance.
+
+    Args:
+        dataset (CAEDataset): The dataset to process.
+        field_key (str): The key for the field to normalize.
+
+    Returns:
+        tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+            mean, std, min, max tensors for the field.
+    """
+    N = torch.tensor(
+        0, dtype=torch.int64, device="cpu"
+    )  # Total number of elements processed
+    mean = None
+    M2 = None  # Sum of squares of differences from the current mean
+    min_val = None
+    max_val = None
+
+    time_start = time.time()
+    for i in range(len(dataset)):
+        print(f"reading file: {i}")
+        data = dataset[i][field_key]
+        if mean is None:
+            # Initialize accumulators based on the shape of the data
+            mean = torch.zeros(data.shape[-1], device=data.device)
+            M2 = torch.zeros(data.shape[-1], device=data.device)
+            min_val = torch.full((data.shape[-1],), float("inf"), device=data.device)
+            max_val = torch.full((data.shape[-1],), float("-inf"), device=data.device)
+        n = data.shape[0]
+        N += n
+
+        # Compute batch statistics
+        batch_mean = data.mean(axis=(0,))
+        batch_M2 = ((data - batch_mean) ** 2).sum(axis=(0,))
+        batch_n = data.shape[0]
+
+        # Update min/max
+        batch_min = data.amin(dim=(0,))
+        batch_max = data.amax(dim=(0,))
+        min_val = torch.minimum(min_val, batch_min)
+        max_val = torch.maximum(max_val, batch_max)
+
+        # Update running mean and M2 (Welford's algorithm)
+        delta = batch_mean - mean
+        N += batch_n
+        mean = mean + delta * (batch_n / N)
+        M2 = M2 + batch_M2 + delta**2 * (batch_n * N) / N
+        time_end = time.time()
+        print(f"Time taken for file {i}: {time_end - time_start:.2f} seconds")
+        time_start = time.time()
+        if i >= max_samples:
+            break
+
+    var = M2 / (N - 1)
+    std = torch.sqrt(var)
+    return mean, std, min_val, max_val
+
+
+@hydra.main(version_base="1.3", config_path="conf", config_name="train_surface")
+def main(cfg: DictConfig) -> None:
+    """
+    Script entry point for computing normalization statistics for a specified field
+    in a dataset, using configuration from a YAML file.
+
+    The computed statistics are printed and saved to a .npz file.
+    """
+
+    # Choose which field to normalize (can be overridden via command line)
+    field_key: str = cfg.data.mode + "_fields"
+
+    # Normalization directory can be configured (backward compatible: defaults to current directory)
+    normalization_dir: str = getattr(cfg.data, "normalization_dir", ".")
+
+    # Construct full path using pathlib (cross-platform, concise)
+    workspace_path: str = str(
+        Path(normalization_dir) / f"{field_key}_normalization.npz"
+    )
+
+    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+    # Create the dataset using configuration parameters
+    dataset = CAEDataset(
+        data_dir=cfg.data.train.data_path,
+        keys_to_read=[
+            field_key,
+        ],
+        keys_to_read_if_available={},
+        output_device=device,
+        preload_depth=cfg.data.preload_depth,
+        pin_memory=cfg.data.pin_memory,
+    )
+    # Compute normalization statistics
+    mean, std, min_val, max_val = compute_mean_std_min_max(dataset, field_key, 100)
+    print(f"Mean for {field_key}: {mean}")
+    print(f"Std for {field_key}: {std}")
+    print(f"Min for {field_key}: {min_val}")
+    print(f"Max for {field_key}: {max_val}")
+
+    # Save statistics to configured workspace path
+    print(f"Saving normalization statistics to: {workspace_path}")
+    np.savez(
+        workspace_path,
+        mean=mean.cpu().numpy(),
+        std=std.cpu().numpy(),
+        min=min_val.cpu().numpy(),
+        max=max_val.cpu().numpy(),
+    )
+    print(f"Successfully saved normalization file: {workspace_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/combined.yaml b/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/combined.yaml
new file mode 100644
index 0000000000..e4bcbd16c1
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/combined.yaml
@@ -0,0 +1,35 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+defaults:
+  -  core
+
+# Overrides for combined data:
+mode: combined
+
+# combined-speficic needs:
+data_keys:
+  - "volume_fields"
+  - "volume_mesh_centers"
+  - "surface_fields"
+  - "surface_mesh_centers"
+  - "surface_normals"
+  - "surface_areas"
+  - "air_density"
+  - "stream_velocity"
+  - "stl_faces"
+  - "stl_centers"
+  - "stl_coordinates"
\ No newline at end of file
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/core.yaml b/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/core.yaml
new file mode 100644
index 0000000000..67adcb45f6
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/core.yaml
@@ -0,0 +1,60 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Paths to your data:
+train:
+  data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/train/
+val:
+  data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/val/
+
+# You can set a normalization factor directory:
+normalization_dir: "src/"
+
+# How many events in advance should we be preloading?
+preload_depth: 1
+
+# Pin memory for GPU transfers?
+pin_memory: true
+
+# Sampling resolution of the point clouds:
+resolution: 200_000
+
+# Surface / Volume / (combined, if supported)
+mode: ???
+
+# For building embeddings: include normal directions for each point?
+include_normals: true
+# Include SDF?  (It's 0 for surface data...)
+include_sdf: true
+# Apply translation invariance via center-of-mass subtraction?
+translational_invariance: true
+# Rescale x/y/z inputs to the model for scale invariance?
+scale_invariance: true
+reference_scale: [12.0, 4.5, 3.25]
+
+# Which parts of the data files to read?  No need to read everything, all the time.
+data_keys: ???
+
+# Load and return the STL geometry info in the dataloader?
+include_geometry: false
+
+# Broadcast global features to the same resolution as points?
+broadcast_global_features: true
+
+# Return the mesh areas and normals?  You don't usually want this for training.
+# We switch it on automatically for inference.
+return_mesh_features: false
+
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/surface.yaml b/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/surface.yaml
new file mode 100644
index 0000000000..8363a7a9dc
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/surface.yaml
@@ -0,0 +1,33 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+defaults:
+  -  core
+
+# Overrides for surface data:
+mode: surface
+
+# Surface-speficic needs:
+data_keys:
+  - "surface_fields"
+  - "surface_mesh_centers"
+  - "surface_normals"
+  - "surface_areas"
+  - "air_density"
+  - "stream_velocity"
+  - "stl_faces"
+  - "stl_centers"
+  - "stl_coordinates"
\ No newline at end of file
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/volume.yaml b/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/volume.yaml
new file mode 100644
index 0000000000..b222fda5f9
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/volume.yaml
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+defaults:
+  -  core
+
+# Overrides for volume data:
+mode: volume
+
+# volume-speficic needs:
+data_keys:
+  - "volume_fields"
+  - "volume_mesh_centers"
+  - "air_density"
+  - "stream_velocity"
+  - "stl_faces"
+  - "stl_centers"
+  - "stl_coordinates"
\ No newline at end of file
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/model/transolver.yaml b/examples/cfd/external_aerodynamics/typhon/src/conf/model/transolver.yaml
new file mode 100644
index 0000000000..c43fb8560c
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/src/conf/model/transolver.yaml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+_target_: physicsnemo.models.transolver.Transolver
+functional_dim: 2
+out_dim: 4
+embedding_dim: 6
+n_layers: 8
+n_hidden: 256
+dropout: 0.0
+n_head: 8
+act: "gelu"
+mlp_ratio: 2
+slice_num: 512
+unified_pos: false
+ref: 8
+structured_shape: null
+use_te: false
+time_input: false
+plus: false
+
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/model/typhon.yaml b/examples/cfd/external_aerodynamics/typhon/src/conf/model/typhon.yaml
new file mode 100644
index 0000000000..2745a00ab6
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/src/conf/model/typhon.yaml
@@ -0,0 +1,35 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+_target_: physicsnemo.experimental.models.typhon.Typhon
+functional_dim: 6
+global_dim: 2
+geometry_dim: 3
+out_dim: 4
+n_layers: 14
+n_hidden: 256
+dropout: 0.0
+n_head: 8
+act: "gelu"
+mlp_ratio: 2
+slice_num: 128
+use_te: false
+plus: false
+include_local_features: true # use local features
+radii: [0.05, 0.25, 1.0, 2.5] # radius for local features
+neighbors_in_radius: [8, 32, 64, 128] # neighbors in radius for local features
+n_hidden_local: 32 # hidden dimension for local features
+
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/training/base.yaml b/examples/cfd/external_aerodynamics/typhon/src/conf/training/base.yaml
new file mode 100644
index 0000000000..18797ea051
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/src/conf/training/base.yaml
@@ -0,0 +1,32 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+num_epochs: 501
+save_interval: 25
+
+scheduler:
+  name: "StepLR"
+  params:
+    step_size: 100
+    gamma: 0.5
+
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 1.0e-3
+  weight_decay: 1.0e-4
+  betas: [0.9, 0.999]
+  eps: 1.0e-8
+
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/typhon_surface.yaml b/examples/cfd/external_aerodynamics/typhon/src/conf/typhon_surface.yaml
new file mode 100644
index 0000000000..d76899bd61
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/src/conf/typhon_surface.yaml
@@ -0,0 +1,48 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+defaults:
+  - training: base
+  - model: typhon
+  - datapipe: surface
+
+output_dir: "runs"
+checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
+run_id: "typhon/surface/bq"
+
+# Performance considerations:
+precision: float32 # float32, float16, bfloat16, or float8
+compile: false
+profile: false
+
+model:
+  functional_dim: 6
+  include_local_features: true # use local features
+  radii: [0.05, 0.25, 1.0, 2.5] # radius for local features
+  neighbors_in_radius: [16, 64, 128, 256] # neighbors in radius for local features
+  n_hidden_local: 32 # hidden dimension for local features
+
+datapipe:
+  include_sdf: false
+  include_geometry: true
+  geometry_sampling: 300_000
+  broadcast_global_features: false
+
+
+# Logging configuration
+logging:
+  level: INFO
+  format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/typhon_volume.yaml b/examples/cfd/external_aerodynamics/typhon/src/conf/typhon_volume.yaml
new file mode 100644
index 0000000000..aa1a16f983
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/src/conf/typhon_volume.yaml
@@ -0,0 +1,48 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+defaults:
+  - training: base
+  - model: typhon
+  - datapipe: volume
+
+output_dir: "runs"
+checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
+run_id: "typhon/volume/bq"
+
+# Performance considerations:
+precision: float32 # float32, float16, bfloat16, or float8
+compile: false
+profile: false
+
+datapipe:
+  include_geometry: true
+  geometry_sampling: 300_000
+  broadcast_global_features: false
+
+
+model:
+  functional_dim: 7
+  out_dim: 5
+  include_local_features: true # use local features
+  radii: [0.05, 0.25, 1.0, 2.5] # radius for local features
+  neighbors_in_radius: [8, 32, 64, 128] # neighbors in radius for local features
+  n_hidden_local: 32 # hidden dimension for local features
+
+# Logging configuration
+logging:
+  level: INFO
+  format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
diff --git a/examples/cfd/external_aerodynamics/typhon/src/inference_on_zarr.py b/examples/cfd/external_aerodynamics/typhon/src/inference_on_zarr.py
new file mode 100644
index 0000000000..64f653352f
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/src/inference_on_zarr.py
@@ -0,0 +1,518 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pathlib import Path
+
+import numpy as np
+import torch
+import torchinfo
+import typing
+import collections
+from typing import Literal
+
+import hydra
+import omegaconf
+from omegaconf import DictConfig
+from physicsnemo.models.transolver.transolver import Transolver
+from physicsnemo.launch.utils import load_checkpoint
+from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
+
+from sklearn.metrics import r2_score
+
+from physicsnemo.distributed import DistributedManager
+
+import time
+
+from physicsnemo.datapipes.cae.transolver_datapipe import (
+    create_transolver_dataset,
+    TransolverDataPipe,
+)
+from train import forward_pass
+from tabulate import tabulate
+
+# import transformer_engine.pytorch as te
+# from transformer_engine.common.recipe import Format, DelayedScaling
+from torch.amp import autocast
+from contextlib import nullcontext
+
+from train import (
+    get_autocast_context,
+    pad_input_for_fp8,
+    unpad_output_for_fp8,
+    update_model_params_for_fp8,
+)
+
+# torch.serialization.add_safe_globals([omegaconf.listconfig.ListConfig])
+# torch.serialization.add_safe_globals([omegaconf.base.ContainerMetadata])
+# torch.serialization.add_safe_globals([typing.Any])
+# torch.serialization.add_safe_globals([list])
+# torch.serialization.add_safe_globals([collections.defaultdict])
+# torch.serialization.add_safe_globals([dict])
+# torch.serialization.add_safe_globals([int])
+# torch.serialization.add_safe_globals([omegaconf.nodes.AnyNode])
+# torch.serialization.add_safe_globals([omegaconf.base.Metadata])
+
+
+@torch.no_grad()
+def compute_force_coefficients(
+    normals: torch.Tensor,
+    area: torch.Tensor,
+    coeff: float,
+    p: torch.Tensor,
+    wss: torch.Tensor,
+    force_direction: torch.Tensor = np.array([1, 0, 0]),
+):
+    """
+    Computes force coefficients for a given mesh. Output includes the pressure and skin
+    friction components. Can be used to compute lift and drag.
+    For drag, use the `force_direction` as the direction of the motion,
+    e.g. [1, 0, 0] for flow in x direction.
+    For lift, use the `force_direction` as the direction perpendicular to the motion,
+    e.g. [0, 1, 0] for flow in x direction and weight in y direction.
+
+    Parameters:
+    -----------
+    normals: torch.Tensor
+        The surface normals on cells of the mesh
+    area: torch.Tensor
+        The surface areas of each cell
+    coeff: float
+        Reciprocal of dynamic pressure times the frontal area, i.e. 2/(A * rho * U^2)
+    p: torch.Tensor
+        Pressure distribution on the mesh (on each cell)
+    wss: torch.Tensor
+        Wall shear stress distribution on the mesh (on each cell)
+    force_direction: torch.Tensor
+        Direction to compute the force, default is np.array([1, 0, 0])
+
+    Returns:
+    --------
+    c_total: float
+        Computed total force coefficient
+    c_p: float
+        Computed pressure force coefficient
+    c_f: float
+        Computed skin friction coefficient
+    """
+
+    # Compute coefficients
+    c_p = coeff * torch.sum(torch.sum(normals * force_direction, dim=-1) * area * p)
+    c_f = -coeff * torch.sum(torch.sum(wss * force_direction, dim=-1) * area)
+
+    # Compute total force coefficients
+    c_total = c_p + c_f
+
+    return c_total, c_p, c_f
+
+
+def batched_inference_loop(
+    batch: dict,
+    model: torch.nn.Module,
+    precision: str,
+    data_mode: Literal["surface", "volume"],
+    batch_resolution: int,
+    output_pad_size: int | None,
+    dist_manager: DistributedManager,
+    datapipe: TransolverDataPipe,
+) -> tuple[float, dict, tuple[torch.Tensor, torch.Tensor]]:
+    N = batch["embeddings"].shape[1]
+    # This generates a random ordering of the input points,
+    # Which we'll then slice up into inputs to the model.
+    indices = torch.randperm(N, device=batch["fx"].device)
+
+    index_blocks = torch.split(indices, batch_resolution)
+
+    global_preds_targets = []
+    global_weight = 0.0
+    start = time.time()
+    for i, index_block in enumerate(index_blocks):
+        # We compute the local_batch by slicing from embeddings and fields:
+        local_embeddings = batch["embeddings"][:, index_block]
+        local_fields = batch["fields"][:, index_block]
+
+        # fx does not need to be sliced for TransolverX:
+        if "geometry" not in batch.keys():
+            local_fx = batch["fx"][:, index_block]
+        else:
+            local_fx = batch["fx"]
+
+        local_batch = {
+            "fx": local_fx,
+            "embeddings": local_embeddings,
+            "fields": local_fields,
+        }
+
+        if "air_density" in batch.keys() and "stream_velocity" in batch.keys():
+            local_batch["air_density"] = batch["air_density"]
+            local_batch["stream_velocity"] = batch["stream_velocity"]
+
+        if "geometry" in batch.keys():
+            local_batch["geometry"] = batch["geometry"]
+
+        # Run the forward inference pass:
+        local_loss, local_metrics, local_preds_targets = forward_pass(
+            local_batch,
+            model,
+            precision,
+            output_pad_size,
+            dist_manager,
+            data_mode,
+            datapipe,
+        )
+
+        # Accumulate the loss and metrics:
+        # (Still on the GPU)
+        weight = index_block.shape[0] / N
+        global_weight += weight
+        if i == 0:
+            metrics = {k: local_metrics[k] * weight for k in local_metrics.keys()}
+            loss = local_loss * weight
+        else:
+            metrics = {
+                k: metrics[k] + local_metrics[k] * weight for k in metrics.keys()
+            }
+            loss += local_loss * weight
+
+        global_preds_targets.append(local_preds_targets)
+
+        end = time.time()
+        elapsed = end - start
+        print(
+            f"Completed sub-batch {i} of {len(index_blocks)} in {elapsed:.4f} seconds"
+        )
+        start = end
+
+    # Now, compute the overall loss, metrics, and coefficients:
+    metrics = {k: v / global_weight for k, v in metrics.items()}
+    loss = loss / global_weight
+
+    global_predictions = torch.cat([l[0] for l in global_preds_targets], dim=1)
+    global_targets = torch.cat([l[1] for l in global_preds_targets], dim=1)
+
+    # Now, we have to *unshuffle* the prediction to the original index
+    inverse_indices = torch.empty_like(indices)
+    inverse_indices[indices] = torch.arange(indices.size(0), device=indices.device)
+    # Suppose prediction is of shape [batch, N, ...]
+    global_predictions = global_predictions[:, inverse_indices]
+    global_targets = global_targets[:, inverse_indices]
+    return loss, metrics, (global_predictions, global_targets)
+
+
+def inference(cfg: DictConfig) -> None:
+    """
+    Run inference on a validation Zarr dataset using a trained Transolver model.
+
+    Args:
+        cfg (DictConfig): Hydra configuration object containing model, data, and training settings.
+
+    Returns:
+        None
+    """
+    DistributedManager.initialize()
+
+    dist_manager = DistributedManager()
+
+    logger = RankZeroLoggingWrapper(PythonLogger(name="training"), dist_manager)
+
+    cfg, output_pad_size = update_model_params_for_fp8(cfg, logger)
+
+    logger.info(f"Config:\n{omegaconf.OmegaConf.to_yaml(cfg, resolve=True)}")
+
+    # Set up model
+    model = hydra.utils.instantiate(cfg.model)
+    logger.info(f"\n{torchinfo.summary(model, verbose=0)}")
+
+    if cfg.checkpoint_dir is not None:
+        checkpoint_dir = cfg.checkpoint_dir
+    else:
+        checkpoint_dir = f"{cfg.output_dir}/{cfg.run_id}/checkpoints"
+
+    ckpt_args = {
+        "path": checkpoint_dir,
+        "models": model,
+    }
+
+    loaded_epoch = load_checkpoint(device=dist_manager.device, **ckpt_args)
+    logger.info(f"loaded epoch: {loaded_epoch}")
+    model.to(dist_manager.device)
+
+    num_params = sum(p.numel() for p in model.parameters())
+    logger.info(f"Number of parameters: {num_params}")
+
+    # Load the normalization file from configured directory (defaults to current dir)
+    norm_dir = getattr(cfg.data, "normalization_dir", ".")
+    if cfg.data.mode == "surface":
+        norm_file = str(Path(norm_dir) / "surface_fields_normalization.npz")
+    elif cfg.data.mode == "volume":
+        norm_file = str(Path(norm_dir) / "volume_fields_normalization.npz")
+
+    norm_data = np.load(norm_file)
+    norm_factors = {
+        "mean": torch.from_numpy(norm_data["mean"]).to(dist_manager.device),
+        "std": torch.from_numpy(norm_data["std"]).to(dist_manager.device),
+    }
+
+    if cfg.compile:
+        model = torch.compile(model, dynamic=True)
+    model.eval()
+
+    # For INFERENCE, we deliberately set the resolution in the data pipe to NONE
+    # so there is not downsampling.  We still batch it in the inference script
+    # for memory usage constraints.
+
+    batch_resolution = cfg.data.resolution
+    cfg.data.resolution = None
+    ## Make sure to read the whole data sample for volume:
+    if cfg.data.mode == "volume":
+        cfg.data.volume_sample_from_disk = False
+
+    # And we need the mesh features for drag, lift in surface data:
+    if cfg.data.mode == "surface":
+        cfg.data.return_mesh_features = True
+
+    # Validation dataset
+    val_dataset = create_transolver_dataset(
+        cfg.data,
+        phase="val",
+        scaling_factors=norm_factors,
+    )
+
+    results = []
+    start = time.time()
+    for batch_idx, batch in enumerate(val_dataset):
+        with torch.no_grad():
+            loss, metrics, (global_predictions, global_targets) = (
+                batched_inference_loop(
+                    batch,
+                    model,
+                    cfg.precision,
+                    cfg.data.mode,
+                    batch_resolution,
+                    output_pad_size,
+                    dist_manager,
+                    val_dataset,
+                )
+            )
+        end = time.time()
+        elapsed = end - start
+        logger.info(f"Finished batch {batch_idx} in {elapsed:.4f} seconds")
+        start = time.time()
+
+        if cfg.data.mode == "surface":
+            coeff = 1.0
+
+            # Compute the drag and loss coefficients:
+            # (Index on [0] is to remove the 1 batch index)
+            pred_pressure, pred_shear = torch.split(
+                global_predictions[0], (1, 3), dim=-1
+            )
+
+            pred_pressure = pred_pressure.reshape(-1)
+            pred_drag_coeff, _, _ = compute_force_coefficients(
+                batch["surface_normals"][0],
+                batch["surface_areas"],
+                coeff,
+                pred_pressure,
+                pred_shear,
+                torch.tensor([[1, 0, 0]], device=dist_manager.device),
+            )
+
+            pred_lift_coeff, _, _ = compute_force_coefficients(
+                batch["surface_normals"][0],
+                batch["surface_areas"],
+                coeff,
+                pred_pressure,
+                pred_shear,
+                torch.tensor([[0, 0, 1]], device=dist_manager.device),
+            )
+
+            # air_density = batch["air_density"] if "air_density" in batch.keys() else None
+            # stream_velocity = batch["stream_velocity"] if "stream_velocity" in batch.keys() else None
+            # true_fields = val_dataset.unscale_model_targets(batch["fields"], air_density=air_density, stream_velocity=stream_velocity)
+            true_pressure, true_shear = torch.split(global_targets[0], (1, 3), dim=-1)
+
+            true_pressure = true_pressure.reshape(-1)
+            true_drag_coeff, _, _ = compute_force_coefficients(
+                batch["surface_normals"][0],
+                batch["surface_areas"],
+                coeff,
+                true_pressure,
+                true_shear,
+                torch.tensor([[1, 0, 0]], device=dist_manager.device),
+            )
+
+            true_lift_coeff, _, _ = compute_force_coefficients(
+                batch["surface_normals"][0],
+                batch["surface_areas"],
+                coeff,
+                true_pressure,
+                true_shear,
+                torch.tensor([[0, 0, 1]], device=dist_manager.device),
+            )
+
+            pred_lift_coeff = pred_lift_coeff.item()
+            pred_drag_coeff = pred_drag_coeff.item()
+
+            # Extract metric values and convert tensors to floats
+            l2_pressure = (
+                metrics["l2_pressure_surf"].item()
+                if hasattr(metrics["l2_pressure_surf"], "item")
+                else metrics["l2_pressure_surf"]
+            )
+            l2_shear_x = (
+                metrics["l2_shear_x"].item()
+                if hasattr(metrics["l2_shear_x"], "item")
+                else metrics["l2_shear_x"]
+            )
+            l2_shear_y = (
+                metrics["l2_shear_y"].item()
+                if hasattr(metrics["l2_shear_y"], "item")
+                else metrics["l2_shear_y"]
+            )
+            l2_shear_z = (
+                metrics["l2_shear_z"].item()
+                if hasattr(metrics["l2_shear_z"], "item")
+                else metrics["l2_shear_z"]
+            )
+
+            results.append(
+                [
+                    batch_idx,
+                    f"{loss:.4f}",
+                    f"{l2_pressure:.4f}",
+                    f"{l2_shear_x:.4f}",
+                    f"{l2_shear_y:.4f}",
+                    f"{l2_shear_z:.4f}",
+                    f"{pred_drag_coeff:.4f}",
+                    f"{pred_lift_coeff:.4f}",
+                    f"{true_drag_coeff:.4f}",
+                    f"{true_lift_coeff:.4f}",
+                    f"{elapsed:.4f}",
+                ]
+            )
+
+        elif cfg.data.mode == "volume":
+            # Extract metric values and convert tensors to floats
+            l2_pressure = (
+                metrics["l2_pressure_vol"].item()
+                if hasattr(metrics["l2_pressure_vol"], "item")
+                else metrics["l2_pressure_vol"]
+            )
+            l2_velocity_x = (
+                metrics["l2_velocity_x"].item()
+                if hasattr(metrics["l2_velocity_x"], "item")
+                else metrics["l2_velocity_x"]
+            )
+            l2_velocity_y = (
+                metrics["l2_velocity_y"].item()
+                if hasattr(metrics["l2_velocity_y"], "item")
+                else metrics["l2_velocity_y"]
+            )
+            l2_velocity_z = (
+                metrics["l2_velocity_z"].item()
+                if hasattr(metrics["l2_velocity_z"], "item")
+                else metrics["l2_velocity_z"]
+            )
+            l2_nut = (
+                metrics["l2_nut"].item()
+                if hasattr(metrics["l2_nut"], "item")
+                else metrics["l2_nut"]
+            )
+
+            results.append(
+                [
+                    batch_idx,
+                    f"{loss:.4f}",
+                    f"{l2_pressure:.4f}",
+                    f"{l2_velocity_x:.4f}",
+                    f"{l2_velocity_y:.4f}",
+                    f"{l2_velocity_z:.4f}",
+                    f"{l2_nut:.4f}",
+                    f"{elapsed:.4f}",
+                ]
+            )
+
+    if cfg.data.mode == "surface":
+        pred_drag_coeffs = [r[6] for r in results]
+        pred_lift_coeffs = [r[7] for r in results]
+        true_drag_coeffs = [r[8] for r in results]
+        true_lift_coeffs = [r[9] for r in results]
+
+        # Compute the R2 scores for lift and drag:
+        r2_lift = r2_score(true_lift_coeffs, pred_lift_coeffs)
+        r2_drag = r2_score(true_drag_coeffs, pred_drag_coeffs)
+
+        headers = [
+            "Batch",
+            "Loss",
+            "L2 Pressure",
+            "L2 Shear X",
+            "L2 Shear Y",
+            "L2 Shear Z",
+            "Predicted Drag Coefficient",
+            "Pred Lift Coefficient",
+            "True Drag Coefficient",
+            "True Lift Coefficient",
+            "Elapsed (s)",
+        ]
+        logger.info(
+            f"Results:\n{tabulate(results, headers=headers, tablefmt='github')}"
+        )
+        logger.info(f"R2 score for lift: {r2_lift:.4f}")
+        logger.info(f"R2 score for drag: {r2_drag:.4f}")
+
+    elif cfg.data.mode == "volume":
+        headers = [
+            "Batch",
+            "Loss",
+            "L2 Pressure",
+            "L2 Velocity X",
+            "L2 Velocity Y",
+            "L2 Velocity Z",
+            "L2 Nut",
+            "Elapsed (s)",
+        ]
+        logger.info(
+            f"Results:\n{tabulate(results, headers=headers, tablefmt='github')}"
+        )
+
+    # Calculate means for each metric (skip batch index)
+    if results:
+        # Convert string values back to float for mean calculation
+        arr = np.array(results)[:, 1:].astype(float)
+        means = arr.mean(axis=0)
+        mean_row = ["Mean"] + [f"{m:.4f}" for m in means]
+        logger.info(
+            f"Summary:\n{tabulate([mean_row], headers=headers, tablefmt='github')}"
+        )
+
+
+@hydra.main(version_base=None, config_path="conf", config_name="train_surface")
+def launch(cfg: DictConfig) -> None:
+    """
+    Launch inference with Hydra configuration.
+
+    Args:
+        cfg (DictConfig): Hydra configuration object.
+
+    Returns:
+        None
+    """
+    inference(cfg)
+
+
+if __name__ == "__main__":
+    launch()
diff --git a/examples/cfd/external_aerodynamics/typhon/src/metrics.py b/examples/cfd/external_aerodynamics/typhon/src/metrics.py
new file mode 100644
index 0000000000..34dbb74aff
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/src/metrics.py
@@ -0,0 +1,164 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+import torch.distributed as dist
+from physicsnemo.distributed import ShardTensor
+from physicsnemo.distributed import DistributedManager
+
+from utils import tensorwise
+
+
+def all_reduce_dict(
+    metrics: dict[str, torch.Tensor], dm: DistributedManager
+) -> dict[str, torch.Tensor]:
+    """
+    Reduces a dictionary of metrics across all distributed processes.
+
+    Args:
+        metrics: Dictionary of metric names to torch.Tensor values.
+        dm: DistributedManager instance for distributed context.
+
+    Returns:
+        Dictionary of reduced metrics.
+    """
+    # TODO - update this to use domains and not the full world
+
+    if dm.world_size == 1:
+        return metrics
+
+    # Pack the metrics together:
+    merged_metrics = torch.stack(list(metrics.values()), dim=-1)
+
+    dist.all_reduce(merged_metrics)
+    merged_metrics = merged_metrics / dm.world_size
+
+    # Unstack metrics:
+    metrics = {key: merged_metrics[i] for i, key in enumerate(metrics.keys())}
+    return metrics
+
+
+@tensorwise
+def metrics_fn(
+    pred: torch.Tensor,
+    target: torch.Tensor,
+    dm: DistributedManager,
+    mode: str,
+) -> dict[str, torch.Tensor]:
+    """
+    Computes metrics for either surface or volume data.
+
+    Args:
+        pred: Predicted values (unnormalized).
+        target: Target values (unnormalized).
+        others: Dictionary containing normalization statistics.
+        dm: DistributedManager instance for distributed context.
+        mode: Either "surface" or "volume".
+
+    Returns:
+        Dictionary of computed metrics.
+    """
+    with torch.no_grad():
+        if mode == "surface":
+            metrics = metrics_fn_surface(pred, target, dm)
+        elif mode == "volume":
+            metrics = metrics_fn_volume(pred, target, dm)
+        else:
+            raise ValueError(f"Unknown data mode: {mode}")
+
+        metrics = all_reduce_dict(metrics, dm)
+        return metrics
+
+
+def metrics_fn_volume(
+    pred: torch.Tensor,
+    target: torch.Tensor,
+    dm: DistributedManager,
+) -> dict[str, torch.Tensor]:
+    """
+    Placeholder for volume metrics computation.
+
+    Args:
+        pred: Predicted values.
+        target: Target values.
+        others: Dictionary containing additional statistics.
+        dm: DistributedManager instance for distributed context.
+        norm_factors: Dictionary of normalization factors.
+
+    Raises:
+        NotImplementedError: Always, as this function is not yet implemented.
+    """
+    l2_num = (pred - target) ** 2
+    l2_num = torch.sum(l2_num, dim=1)
+    l2_num = torch.sqrt(l2_num)
+
+    l2_denom = target**2
+    l2_denom = torch.sum(l2_denom, dim=1)
+    l2_denom = torch.sqrt(l2_denom)
+
+    l2 = l2_num / l2_denom
+
+    metrics = {
+        "l2_pressure_vol": torch.mean(l2[:, 3]),
+        "l2_velocity_x": torch.mean(l2[:, 0]),
+        "l2_velocity_y": torch.mean(l2[:, 1]),
+        "l2_velocity_z": torch.mean(l2[:, 2]),
+        "l2_nut": torch.mean(l2[:, 4]),
+    }
+
+    return metrics
+
+
+def metrics_fn_surface(
+    pred: torch.Tensor,
+    target: torch.Tensor,
+    dm: DistributedManager,
+) -> dict[str, torch.Tensor]:
+    """
+    Computes L2 surface metrics between prediction and target.
+
+    Args:
+        pred: Predicted values (normalized).
+        target: Target values (normalized).
+        others: Dictionary containing normalization statistics.
+        dm: DistributedManager instance for distributed context.
+        norm_factors: Dictionary with 'mean' and 'std' for unnormalization.
+
+    Returns:
+        Dictionary of L2 surface metrics for pressure and shear components.
+    """
+    # Unnormalize the surface values for L2:
+    # target = target * norm_factors["std"] + norm_factors["mean"]
+    # pred = pred * norm_factors["std"] + norm_factors["mean"]
+
+    l2_num = (pred - target) ** 2
+    l2_num = torch.sum(l2_num, dim=1)
+    l2_num = torch.sqrt(l2_num)
+
+    l2_denom = target**2
+    l2_denom = torch.sum(l2_denom, dim=1)
+    l2_denom = torch.sqrt(l2_denom)
+
+    l2 = l2_num / l2_denom
+
+    metrics = {
+        "l2_pressure_surf": torch.mean(l2[:, 0]),
+        "l2_shear_x": torch.mean(l2[:, 1]),
+        "l2_shear_y": torch.mean(l2[:, 2]),
+        "l2_shear_z": torch.mean(l2[:, 3]),
+    }
+
+    return metrics
diff --git a/examples/cfd/external_aerodynamics/typhon/src/preprocess.py b/examples/cfd/external_aerodynamics/typhon/src/preprocess.py
new file mode 100644
index 0000000000..b85ff4dff6
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/src/preprocess.py
@@ -0,0 +1,121 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+
+from physicsnemo.distributed.shard_tensor import ShardTensor
+from physicsnemo.utils.profiling import profile
+
+
+@profile
+def preprocess_surface_data(
+    batch: dict,
+    norm_factors: dict[str, torch.Tensor],
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
+    """
+    Preprocess the surface data.  The functional input
+    is the air density and stream velocity.  The embeddings
+    are the surface mesh centers and normals.  The targets are
+    normalized to mean of 0, std 1.  We cache the mean and std
+    to de-normalize when computing the metrics.
+    """
+
+    mesh_centers = batch["surface_mesh_centers"]
+    normals = batch["surface_normals"]
+    targets = batch["surface_fields"]
+    node_features = torch.stack(
+        [batch["air_density"], batch["stream_velocity"]], dim=-1
+    ).to(torch.float32)
+
+    # Normalize the surface fields:
+    targets = (targets - norm_factors["mean"]) / norm_factors["std"]
+
+    # If you want to use this, be sure to update the
+    # functional_dim value in your configuration
+
+    # fourier_sin_features = [
+    #     torch.sin(mesh_centers * (2 ** i) * torch.pi)
+    #     for i in range(4)
+    # ]
+    # fourier_cos_features = [
+    #     torch.cos(mesh_centers * (2 ** i) * torch.pi)
+    #     for i in range(4)
+    # ]
+
+    # Calculate center of mass
+    sizes = batch["stl_areas"]
+    centers = batch["stl_centers"]
+
+    total_weighted_position = torch.einsum("ki,kij->kj", sizes, centers)
+    total_size = torch.sum(sizes)
+    center_of_mass = total_weighted_position[None, ...] / total_size
+
+    # Subtract the COM from the centers:
+    mesh_centers = mesh_centers - center_of_mass
+
+    embeddings = torch.cat(
+        [
+            mesh_centers,
+            normals,
+            # *fourier_sin_features,
+            # *fourier_cos_features
+        ],
+        dim=-1,
+    )
+
+    others = {
+        "surface_areas": sizes,
+        "surface_normals": normals,
+        "stream_velocity": batch["stream_velocity"],
+        "air_density": batch["air_density"],
+    }
+
+    return node_features, embeddings, targets, others
+
+
+@profile
+def downsample_surface(
+    features: torch.Tensor,
+    embeddings: torch.Tensor,
+    targets: torch.Tensor,
+    num_keep=1024,
+):
+    if num_keep == -1:
+        features = features.unsqueeze(1).expand(1, embeddings.shape[1], -1)
+        return features, embeddings, targets
+
+    """
+    Downsample the surface data. We generate one set of indices, and
+    use it to sample the same points from the features, embeddings,
+    and targets.  Using torch.multinomial to sample without replacement.
+    """
+
+    num_samples = embeddings.shape[1]
+    # Generate random indices to keep (faster for large num_samples)
+    indices = torch.multinomial(
+        torch.ones(num_samples, device=features.device), num_keep, replacement=False
+    )
+
+    # Use the same indices to downsample all tensors
+    downsampled_embeddings = embeddings[:, indices]
+    downsampled_targets = targets[:, indices]
+    # This unsqueezes the features (air density and stream velocity) to
+    # the same shape as the embeddings
+    downsampled_features = features.unsqueeze(1).expand(
+        1, downsampled_embeddings.shape[1], -1
+    )
+
+    return downsampled_features, downsampled_embeddings, downsampled_targets
diff --git a/examples/cfd/external_aerodynamics/typhon/src/surface_fields_normalization.npz b/examples/cfd/external_aerodynamics/typhon/src/surface_fields_normalization.npz
new file mode 100644
index 0000000000000000000000000000000000000000..228f7550cc4be6993352c81bb56def3b95cd036d
GIT binary patch
literal 1040
zcmWIWW@gc4fB;2?g_0Tf|3d)>g9t-zYGR&VUO^=zg8;(>s45se*)P;LAd-=xjG<aR
zCAB!YNZm?7-6qXMT}MGZEx)LwC^0WSzbGXYB<_}&Q=AGEFV09TNComWOmsAL6lxVH
z09=Roy!SB~b?nlMSz)U8rEW(6iceN^A6N<V2|Lgy#U&|&)gzKGP3_`sH(8yC%CItW
zw6{X_$iMk>cO!cwH*?U&NPn&AAsLs+dnfT%@AXgrv=`MQTbRVJqIe{+Vz9<Yf1|d8
zYF@Z)my?|Bj|q=#1H2iTbeU1J2`C$YFb6bSfT#vW5DCjP=(<2j5~hnCq!0=k7*7FN
w&;*LE2^9U<HBAOu22Bj;nm|E~T~iDbP!lBJ1H4(;KrUbb!mB`9l^MhX05f;!Pyhe`

literal 0
HcmV?d00001

diff --git a/examples/cfd/external_aerodynamics/typhon/src/train.py b/examples/cfd/external_aerodynamics/typhon/src/train.py
new file mode 100644
index 0000000000..5fa1c82775
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/src/train.py
@@ -0,0 +1,900 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Core python imports:
+import os
+import time
+from pathlib import Path
+from typing import Literal, Any, Callable, Sequence
+import collections
+from contextlib import nullcontext
+
+from collections.abc import Sequence
+
+# Configuration:
+import hydra
+import omegaconf
+from omegaconf import DictConfig
+
+# Pytorch imports:
+import torch
+from torch.optim import Optimizer
+from torch.amp import autocast, GradScaler
+from torch.utils.tensorboard import SummaryWriter
+
+import torch.distributed as dist
+
+# For metrics and model printouts:
+from tabulate import tabulate
+import torchinfo
+
+# For loading dataset stats:
+import numpy as np
+
+# Physicsnemo imports ...
+from physicsnemo.launch.utils import load_checkpoint, save_checkpoint
+from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
+from physicsnemo.distributed import DistributedManager
+from physicsnemo.utils.profiling import profile, Profiler
+from physicsnemo.datapipes.cae.transolver_datapipe import (
+    create_transolver_dataset,
+    TransolverDataPipe,
+)
+
+# Local folder imports for this example
+from metrics import metrics_fn
+from preprocess import (
+    preprocess_surface_data,
+    downsample_surface,
+)
+
+# tensorwise is to handle single-point-cloud or multi-point-cloud running.
+# it's a decorator that will automatically unzip one or more of a list of tensors,
+# run the funtcion, and rezip the results.
+from utils import tensorwise
+
+# Special import, if transformer engine is available:
+from physicsnemo.utils.version_check import check_min_version
+
+TE_AVAILABLE = check_min_version("transformer_engine", "0.0.0", hard_fail=False)
+
+if TE_AVAILABLE:
+    import transformer_engine.pytorch as te
+    from transformer_engine.common.recipe import Format, DelayedScaling
+else:
+    te, Format, DelayedScaling = None, None, None
+
+# This will go away when checkpointing is refined further below:
+torch.serialization.add_safe_globals([omegaconf.listconfig.ListConfig])
+torch.serialization.add_safe_globals([omegaconf.base.ContainerMetadata])
+torch.serialization.add_safe_globals([Any])
+torch.serialization.add_safe_globals([list])
+torch.serialization.add_safe_globals([collections.defaultdict])
+torch.serialization.add_safe_globals([dict])
+torch.serialization.add_safe_globals([int])
+torch.serialization.add_safe_globals([omegaconf.nodes.AnyNode])
+torch.serialization.add_safe_globals([omegaconf.base.Metadata])
+
+
+class CombinedOptimizer(Optimizer):
+    """Combine multiple PyTorch optimizers into a single Optimizer-like interface.
+
+    The wrapper concatenates the *param_groups* from all contained optimizers so
+    that learning-rate schedulers (e.g., ReduceLROnPlateau, CosineAnnealingLR)
+    operate transparently across every parameter. Only a minimal subset of the
+    *torch.optim.Optimizer* API is implemented—extend as needed.
+
+    Note:
+        This will get upstreamed to physicsnemo shortly.  Don't count on this
+        class existing here in the future!
+
+        In other words, this is already marked for deprecation!
+    """
+
+    def __init__(
+        self,
+        optimizers: Sequence[Optimizer],
+        torch_compile_kwargs: dict[str, Any] | None = None,
+    ):
+        if not optimizers:
+            raise ValueError("`optimizers` must contain at least one optimizer.")
+
+        self.optimizers = optimizers
+
+        # Collect parameter groups from all optimizers. We pass an empty
+        # *defaults* dict because hyper-parameters are managed by the inner
+        # optimizers, not this wrapper.
+        param_groups = [g for opt in optimizers for g in opt.param_groups]
+        super().__init__(param_groups, defaults={})
+
+        if torch_compile_kwargs is None:
+            self.step_fns: list[Callable] = [opt.step for opt in optimizers]
+        else:
+            self.step_fns: list[Callable] = [
+                torch.compile(opt.step, **torch_compile_kwargs) for opt in optimizers
+            ]
+
+    def zero_grad(self, *args, **kwargs) -> None:
+        """Nullify gradients"""
+        for opt in self.optimizers:
+            opt.zero_grad(*args, **kwargs)
+
+    def step(self, closure=None) -> None:
+        for step_fn in self.step_fns:
+            if closure is None:
+                step_fn()
+            else:
+                step_fn(closure)
+
+    def state_dict(self):
+        return {"optimizers": [opt.state_dict() for opt in self.optimizers]}
+
+    def load_state_dict(self, state_dict):
+        for opt, sd in zip(self.optimizers, state_dict["optimizers"]):
+            opt.load_state_dict(sd)
+
+        self.param_groups = [g for opt in self.optimizers for g in opt.param_groups]
+
+
+def get_autocast_context(precision: str) -> nullcontext:
+    """
+    Returns the appropriate autocast context for mixed precision training.
+
+    Args:
+        precision (str): The desired precision. Supported values are "float16", "bfloat16", or any other string for no autocast.
+
+    Returns:
+        Context manager: An autocast context for the specified precision, or a nullcontext if precision is not recognized.
+    """
+    if precision == "float16":
+        return autocast("cuda", dtype=torch.float16)
+    elif precision == "bfloat16":
+        return autocast("cuda", dtype=torch.bfloat16)
+    elif precision == "float8" and TE_AVAILABLE:
+        fp8_format = Format.HYBRID
+        fp8_recipe = DelayedScaling(
+            fp8_format=fp8_format, amax_history_len=16, amax_compute_algo="max"
+        )
+        return te.fp8_autocast(enabled=True, fp8_recipe=fp8_recipe)
+    else:
+        return nullcontext()
+
+
+@tensorwise
+def cast_precisions(tensor: torch.Tensor, precision: str) -> torch.Tensor:
+    """
+    Casts the tensors to the specified precision.
+
+    We are careful to take either a tensor or list of tensors, and return the same format.
+    """
+
+    match precision:
+        case "float16":
+            dtype = torch.float16
+        case "bfloat16":
+            dtype = torch.bfloat16
+        case _:
+            dtype = None
+
+    if dtype is not None:
+        return tensor.to(dtype)
+    else:
+        return tensor
+
+
+@tensorwise
+def pad_input_for_fp8(
+    features: torch.Tensor,
+    embeddings: torch.Tensor,
+    geometry: torch.Tensor | None = None,
+) -> torch.Tensor:
+    """
+    Pads the input features tensor so that the concatenated feature and embedding dimension is a multiple of 16,
+    which is required for FP8 operations.  Only the features is updated.
+
+    Args:
+        features (torch.Tensor): The input features tensor of shape (..., feature_dim).
+        embeddings (torch.Tensor): The embeddings tensor of shape (..., embedding_dim).
+
+    Returns:
+        torch.Tensor: The padded features tensor, so that (features.shape[-1] + embeddings.shape[-1]) is a multiple of 16.
+    """
+    fx_dim = features.shape[-1] + embeddings.shape[-1]
+    if fx_dim % 16 != 0:
+        pad_size = 16 - (fx_dim % 16)
+        features = torch.nn.functional.pad(features, (0, pad_size))
+        fx_dim = features.shape[-1] + embeddings.shape[-1]
+
+    if geometry is not None:
+        geometry_dim = geometry.shape[-1] if geometry is not None else 0
+        if geometry_dim % 16 != 0:
+            pad_size = 16 - (geometry_dim % 16)
+            geometry = torch.nn.functional.pad(geometry, (0, pad_size))
+            geometry_dim = geometry.shape[-1]
+
+    return features, geometry
+
+
+@tensorwise
+def unpad_output_for_fp8(
+    outputs: torch.Tensor, output_pad_size: int | None
+) -> torch.Tensor:
+    """
+    Removes the padding from the output tensor that was added for FP8 compatibility.
+
+    Args:
+        outputs (torch.Tensor): The output tensor of shape (..., output_dim + pad_size) if padded.
+        output_pad_size (int | None): The number of padded elements to remove from the last dimension. If None, no unpadding is performed.
+
+    Returns:
+        torch.Tensor: The unpadded output tensor.
+    """
+    # Remove the padded outputs:
+    if output_pad_size is not None:
+        return outputs[:, :, :-output_pad_size]
+    return outputs
+
+
+@tensorwise
+def loss_fn(outputs: torch.Tensor, targets: torch.Tensor) -> torch.Tensor:
+    """
+    Compute the loss for the model.
+    """
+    return torch.nn.functional.mse_loss(outputs, targets)
+
+
+def forward_pass(
+    batch: dict,
+    model: torch.nn.Module,
+    precision: str,
+    output_pad_size: int | None,
+    dist_manager: DistributedManager,
+    data_mode: Literal["surface", "volume"],
+    datapipe: TransolverDataPipe,
+):
+    """
+    Run the forward pass of the model for one batch, including metrics and loss calculation.
+
+    Transolver takes just one tensor for features, embeddings.
+    Typhon takes a  list of tensors, for each.
+
+    Typhon needs a `geometry` tensor, so that's the switch we use to distinguish.
+
+    """
+
+    features = batch["fx"]
+    embeddings = batch["embeddings"]
+    targets = batch["fields"]
+
+    # Cast precisions:
+    features = cast_precisions(features, precision=precision)
+    embeddings = cast_precisions(embeddings, precision=precision)
+    if "geometry" in batch.keys():
+        geometry = cast_precisions(batch["geometry"], precision=precision)
+    else:
+        geometry = None
+
+    all_metrics = {}
+    if datapipe.config.model_type == "combined":
+        # This is hard coded for Typhon.  If you have more point clouds,
+        # your mileage may vary.
+        modes = ["surface", "volume"]
+    elif datapipe.config.model_type == "surface":
+        modes = [
+            "surface",
+        ]
+    elif datapipe.config.model_type == "volume":
+        modes = [
+            "volume",
+        ]
+
+    with get_autocast_context(precision):
+        # For fp8, we may have to pad the inputs:
+        if precision == "float8" and TE_AVAILABLE:
+            features, geometry = pad_input_for_fp8(features, embeddings, geometry)
+
+        if "geometry" in batch.keys():
+            # This is the Typhon path
+            outputs = model(
+                global_embedding=features, local_embedding=embeddings, geometry=geometry
+            )
+
+            outputs = unpad_output_for_fp8(outputs, output_pad_size)
+            # Loss per point cloud:
+            loss = loss_fn(outputs, targets)
+            # Log them too:
+            for i, mode in enumerate(modes):
+                all_metrics[f"loss/{mode}"] = loss[i]
+            # Averaging over point cloud inputs, instead of summing.
+            full_loss = torch.mean(torch.stack(loss))
+
+        else:
+            # This is the Transolver path
+            outputs = model(fx=features, embedding=embeddings)
+            outputs = unpad_output_for_fp8(outputs, output_pad_size)
+            full_loss = torch.nn.functional.mse_loss(outputs, targets)
+
+            all_metrics[f"loss/{modes[0]}"] = full_loss
+
+    air_density = batch["air_density"] if "air_density" in batch.keys() else None
+    stream_velocity = (
+        batch["stream_velocity"] if "stream_velocity" in batch.keys() else None
+    )
+
+    unscaled_outputs = tensorwise(datapipe.unscale_model_targets)(
+        outputs,
+        air_density=air_density,
+        stream_velocity=stream_velocity,
+        factor_type=modes,
+    )
+    unscaled_targets = tensorwise(datapipe.unscale_model_targets)(
+        targets,
+        air_density=air_density,
+        stream_velocity=stream_velocity,
+        factor_type=modes,
+    )
+    metrics = metrics_fn(unscaled_outputs, unscaled_targets, dist_manager, modes)
+
+    # In the combined mode, this is a list of dicts.  Merge them.
+    metrics = (
+        {k: v for d in metrics for k, v in d.items()}
+        if isinstance(metrics, list)
+        else metrics
+    )
+    all_metrics.update(metrics)
+
+    # if "geometry" in batch.keys():
+    #     print(f"HERE")
+    #     unscaled_outputs = []
+    #     unscaled_targets = []
+    #     for i in range(len(outputs)):
+    #         local_unscaled_outputs = datapipe.unscale_model_targets(
+    #             outputs[i],
+    #             air_density=air_density,
+    #             stream_velocity=stream_velocity,
+    #             factor_type=modes[i],
+    #         )
+    #         local_unscaled_targets = datapipe.unscale_model_targets(
+    #             targets[i],
+    #             air_density=air_density,
+    #             stream_velocity=stream_velocity,
+    #             factor_type=modes[i],
+    #         )
+    #         print(f"local_unscaled_outputs: {local_unscaled_outputs.shape}")
+    #         print(f"local_unscaled_targets: {local_unscaled_targets.shape}")
+    #         metrics = metrics_fn(local_unscaled_outputs, local_unscaled_targets, dist_manager, modes[i])
+    #         print(f"metrics: {metrics}")
+    #         all_metrics.update(metrics)
+    #         unscaled_outputs.append(local_unscaled_outputs)
+    #         unscaled_targets.append(local_unscaled_targets)
+    # else:
+    #     unscaled_outputs = datapipe.unscale_model_targets(
+    #         outputs,
+    #         air_density=air_density,
+    #         stream_velocity=stream_velocity,
+    #         factor_type=modes[0],
+    #     )
+    #     unscaled_targets = datapipe.unscale_model_targets(
+    #         targets,
+    #         air_density=air_density,
+    #         stream_velocity=stream_velocity,
+    #         factor_type=modes[0],
+    #     )
+
+    #     metrics = metrics_fn(unscaled_outputs, unscaled_targets, dist_manager, modes[0])
+    #     all_metrics.update(metrics)
+
+    return full_loss, all_metrics, (unscaled_outputs, unscaled_targets)
+
+
+@profile
+def train_epoch(
+    dataloader,
+    epoch_len: int,
+    model: torch.nn.Module,
+    output_pad_size: int | None,
+    optimizer: torch.optim.Optimizer,
+    scheduler: torch.optim.lr_scheduler._LRScheduler,
+    logger: PythonLogger,
+    writer: SummaryWriter,
+    epoch: int,
+    cfg: DictConfig,
+    dist_manager: DistributedManager,
+    scaler: GradScaler | None = None,
+) -> float:
+    """
+    Train the model for one epoch.
+
+    Args:
+        dataloader: Training data loader
+        model (torch.nn.Module): The neural network model to train.
+        epoch_len (int): Length of the epoch.
+        output_pad_size (int | None): Optional output padding size for lowest precisions (FP8).
+        optimizer (torch.optim.Optimizer): Optimizer for model parameters.
+        scheduler (torch.optim.lr_scheduler._LRScheduler): Learning rate scheduler.
+        logger (PythonLogger): Logger for training progress.
+        writer (SummaryWriter): TensorBoard writer for logging metrics.
+        epoch (int): Current epoch number.
+        cfg (DictConfig): Hydra configuration object.
+        dist_manager (DistributedManager): Distributed manager from physicsnemo.
+        scaler (GradScaler | None, optional): Gradient scaler for mixed precision training.
+    Returns:
+        float: The average training loss for the epoch.
+    """
+    model.train()
+    total_loss = 0
+    total_metrics = {}
+
+    precision = getattr(cfg, "precision", "float32")
+    start_time = time.time()
+
+    for i, batch in enumerate(dataloader):
+        # TransolverX has a different forward pass:
+
+        loss, metrics, _ = forward_pass(
+            batch,
+            model,
+            precision,
+            output_pad_size,
+            dist_manager,
+            cfg.datapipe.mode,
+            dataloader,
+        )
+
+        optimizer.zero_grad()
+        if precision == "float16" and scaler is not None:
+            scaler.scale(loss).backward()
+            scaler.step(optimizer)
+            scaler.update()
+        else:
+            loss.backward()
+            optimizer.step()
+
+        if not isinstance(scheduler, torch.optim.lr_scheduler.StepLR):
+            scheduler.step()
+
+        end_time = time.time()
+
+        # Logging
+        this_loss = loss.detach().item()
+        total_loss += this_loss
+
+        if i == 0:
+            total_metrics = metrics
+        else:
+            total_metrics = {
+                k: total_metrics[k] + metrics[k].item() for k in metrics.keys()
+            }
+
+        duration = end_time - start_time
+        start_time = end_time
+        images_per_second = 1 / duration
+
+        mem_usage = torch.cuda.memory_reserved() / 1024**3
+
+        logger.info(
+            f"Epoch {epoch} [{i}/{epoch_len}] Loss: {this_loss:.6f} Duration: {duration:.2f}s Mem: {mem_usage:.2f}GB"
+        )
+        if dist_manager.rank == 0:
+            writer.add_scalar(
+                "batch/learning_rate",
+                optimizer.param_groups[0]["lr"],
+                i + epoch_len * epoch,
+            )
+            writer.add_scalar("batch/loss", this_loss, i + epoch_len * epoch)
+            writer.add_scalar(
+                "batch/throughpu_per_gpu", images_per_second, i + epoch_len * epoch
+            )
+            for metric_name, metric_value in metrics.items():
+                writer.add_scalar(
+                    f"batch/{metric_name}", metric_value, i + epoch_len * epoch
+                )
+
+        if cfg.profile and i >= 10:
+            break  # Stop profiling after 10 batches
+
+    avg_loss = total_loss / epoch_len
+    avg_metrics = {k: v / epoch_len for k, v in total_metrics.items()}
+    if dist_manager.rank == 0:
+        writer.add_scalar("epoch/loss", avg_loss, epoch)
+        for metric_name, metric_value in avg_metrics.items():
+            writer.add_scalar(f"epoch/{metric_name}", metric_value, epoch)
+        # Print average metrics using tabulate
+        metrics_table = tabulate(
+            [[k, v] for k, v in avg_metrics.items()],
+            headers=["Metric", "Average Value"],
+            tablefmt="pretty",
+        )
+        print(f"\nEpoch {epoch} Average Metrics:\n{metrics_table}\n")
+    return avg_loss
+
+
+@profile
+def val_epoch(
+    dataloader,
+    epoch_len: int,
+    model: torch.nn.Module,
+    output_pad_size: int | None,
+    logger: PythonLogger,
+    val_writer: SummaryWriter,
+    epoch: int,
+    cfg: DictConfig,
+    dist_manager: DistributedManager,
+) -> float:
+    """
+    Run validation for one epoch.
+
+    Args:
+        dataloader: Validation data loader.
+        epoch_len (int): Length of the epoch.
+        model (torch.nn.Module): The model to evaluate.
+        output_pad_size (int | None): Optional output padding size for lowest precisions (FP8).
+        logger (PythonLogger): Logger for validation progress.
+        val_writer (SummaryWriter): TensorBoard writer for logging validation metrics.
+        epoch (int): Current epoch number.
+        cfg (DictConfig): Hydra configuration object.
+        dist_manager (DistributedManager): Distributed manager instance.
+    Returns:
+        float: The average validation loss for the epoch.
+    """
+
+    model.eval()  # Set model to evaluation mode
+    total_loss = 0
+    total_metrics = {}
+
+    precision = getattr(cfg.training, "precision", "float32")
+
+    start_time = time.time()
+    with torch.no_grad():  # Disable gradient computation
+        for i, batch in enumerate(dataloader):
+            loss, metrics, _ = forward_pass(
+                batch,
+                model,
+                precision,
+                output_pad_size,
+                dist_manager,
+                cfg.datapipe.mode,
+                dataloader,
+            )
+
+            if i == 0:
+                total_metrics = metrics
+            else:
+                total_metrics = {
+                    k: total_metrics[k] + metrics[k].item() for k in metrics.keys()
+                }
+
+            # Logging
+            this_loss = loss.detach().item()
+            total_loss += this_loss
+
+            end_time = time.time()
+            duration = end_time - start_time
+            start_time = end_time
+
+            logger.info(
+                f"Val [{i}/{epoch_len}] Loss: {this_loss:.6f} Duration: {duration:.2f}s"
+            )
+            # We don't add individual loss measurements to tensorboard in the validation loop.
+
+            if cfg.profile and i >= 10:
+                break  # Stop profiling after 10 batches
+
+    avg_loss = total_loss / epoch_len
+    avg_metrics = {k: v / epoch_len for k, v in total_metrics.items()}
+    if dist_manager.rank == 0:
+        val_writer.add_scalar("epoch/loss", avg_loss, epoch)
+        for metric_name, metric_value in avg_metrics.items():
+            val_writer.add_scalar(f"epoch/{metric_name}", metric_value, epoch)
+        # Print average metrics using tabulate
+        metrics_table = tabulate(
+            [[k, v] for k, v in avg_metrics.items()],
+            headers=["Metric", "Average Value"],
+            tablefmt="pretty",
+        )
+        print(f"\nEpoch {epoch} Validation Average Metrics:\n{metrics_table}\n")
+    return avg_loss
+
+
+def update_model_params_for_fp8(cfg, logger) -> tuple | None:
+    """
+    Adjusts model configuration parameters to ensure compatibility with FP8 computations.
+
+    The output shape will be padded to a multiple of 16.  The input shape
+    is padded dynamically in the forward pass, but that is printed here
+    for information.
+
+    Args:
+        cfg: Configuration object with model and training attributes.
+        logger: Logger object for info messages.
+
+    Returns:
+        tuple: (cfg, output_pad_size) if precision is "float8", where output_pad_size is the amount
+               of padding added to the output dimension (or None if no padding was needed).
+    """
+    # we have to manipulate the output shape
+    # to enable fp8 computations with transformer_engine.
+    # need the input and output to be divisible by 16.
+    # if (cfg.model.embedding_dim + cfg.model.functional_dim) % 16 != 0:
+
+    output_pad_size = None
+    if cfg.precision == "float8":
+        if cfg.model.out_dim % 16 != 0:
+            # pad the output:
+            output_pad_size = 16 - (cfg.model.out_dim % 16)
+            cfg.model.out_dim += output_pad_size
+            logger.info(
+                f"Padding output dimension to {cfg.model.out_dim} for fp8 autocast"
+            )
+
+        # This part is informational only:
+        if (cfg.model.functional_dim + cfg.model.embedding_dim) % 16 != 0:
+            input_pad_size = 16 - (
+                (cfg.model.functional_dim + cfg.model.embedding_dim) % 16
+            )
+            cfg.model.functional_dim += input_pad_size
+            logger.info(
+                f"Padding input dimension to {cfg.model.functional_dim} and {cfg.model.embedding_dim} for fp8 autocast"
+            )
+
+    return cfg, output_pad_size
+
+
+@profile
+def main(cfg: DictConfig):
+    """Main training function
+
+    Args:
+        cfg: Hydra configuration object
+    """
+
+    DistributedManager.initialize()
+
+    # Set up distributed training
+    dist_manager = DistributedManager()
+
+    # Set up logging
+    logger = RankZeroLoggingWrapper(PythonLogger(name="training"), dist_manager)
+
+    # Set checkpoint directory - defaults to output_dir if not specified
+    checkpoint_dir = getattr(cfg, "checkpoint_dir", None)
+    if checkpoint_dir is None:
+        checkpoint_dir = cfg.output_dir
+
+    if dist_manager.rank == 0:
+        os.makedirs(cfg.output_dir, exist_ok=True)
+        os.makedirs(checkpoint_dir, exist_ok=True)
+        writer = SummaryWriter(
+            log_dir=os.path.join(
+                cfg.output_dir + "/" + cfg.run_id + "/train",
+            )
+        )
+        val_writer = SummaryWriter(
+            log_dir=os.path.join(
+                cfg.output_dir + "/" + cfg.run_id + "/val",
+            )
+        )
+    else:
+        writer = None
+        val_writer = None
+
+    logger.info(f"Config:\n{omegaconf.OmegaConf.to_yaml(cfg, resolve=True)}")
+    logger.info(f"Output directory: {cfg.output_dir}/{cfg.run_id}")
+    logger.info(f"Checkpoint directory: {checkpoint_dir}/{cfg.run_id}/checkpoints")
+
+    cfg, output_pad_size = update_model_params_for_fp8(cfg, logger)
+
+    # Set up model
+    # (Using partial convert to get lists, etc., instead of ListConfigs.)
+    model = hydra.utils.instantiate(cfg.model, _convert_="partial")
+    logger.info(f"\n{torchinfo.summary(model, verbose=0)}")
+
+    model.to(dist_manager.device)
+
+    model = torch.nn.parallel.DistributedDataParallel(
+        model,
+        device_ids=[dist_manager.local_rank],
+        output_device=dist_manager.device,
+    )
+
+    num_params = sum(p.numel() for p in model.parameters())
+    logger.info(f"Number of parameters: {num_params}")
+
+    # Load the normalization file from configured directory (defaults to current dir)
+    norm_dir = getattr(cfg.datapipe, "normalization_dir", ".")
+    if cfg.datapipe.mode == "surface" or cfg.datapipe.mode == "combined":
+        norm_file = str(Path(norm_dir) / "surface_fields_normalization.npz")
+        norm_data = np.load(norm_file)
+        surface_factors = {
+            "mean": torch.from_numpy(norm_data["mean"]).to(dist_manager.device),
+            "std": torch.from_numpy(norm_data["std"]).to(dist_manager.device),
+        }
+    else:
+        surface_factors = None
+
+    if cfg.datapipe.mode == "volume" or cfg.datapipe.mode == "combined":
+        norm_file = str(Path(norm_dir) / "volume_fields_normalization.npz")
+        norm_data = np.load(norm_file)
+        volume_factors = {
+            "mean": torch.from_numpy(norm_data["mean"]).to(dist_manager.device),
+            "std": torch.from_numpy(norm_data["std"]).to(dist_manager.device),
+        }
+    else:
+        volume_factors = None
+
+    # Training dataset
+    train_dataloader = create_transolver_dataset(
+        cfg.datapipe,
+        phase="train",
+        surface_factors=surface_factors,
+        volume_factors=volume_factors,
+    )
+
+    # Validation dataset
+
+    val_dataloader = create_transolver_dataset(
+        cfg.datapipe,
+        phase="val",
+        surface_factors=surface_factors,
+        volume_factors=volume_factors,
+    )
+
+    num_replicas = dist_manager.world_size
+    data_rank = dist_manager.rank
+
+    # Set up distributed samplers
+    train_sampler = torch.utils.data.distributed.DistributedSampler(
+        train_dataloader,
+        num_replicas=num_replicas,
+        rank=data_rank,
+        shuffle=True,
+        drop_last=True,
+    )
+
+    val_sampler = torch.utils.data.distributed.DistributedSampler(
+        val_dataloader,
+        num_replicas=num_replicas,
+        rank=data_rank,
+        shuffle=False,  # No shuffling for validation
+        drop_last=True,
+    )
+
+    muon_params = [p for p in model.parameters() if p.ndim == 2]
+    other_params = [p for p in model.parameters() if p.ndim != 2]
+
+    # Set up optimizer and scheduler
+    optimizer = hydra.utils.instantiate(cfg.training.optimizer, params=other_params)
+
+    optimizer = CombinedOptimizer(
+        optimizers=[
+            torch.optim.Muon(
+                muon_params,
+                lr=cfg.training.optimizer.lr,
+                weight_decay=cfg.training.optimizer.weight_decay,
+                adjust_lr_fn="match_rms_adamw",
+            ),
+            optimizer,
+        ],
+    )
+
+    # Set up learning rate scheduler based on config
+    scheduler_cfg = cfg.training.scheduler
+    scheduler_name = scheduler_cfg.name
+    scheduler_params = dict(scheduler_cfg.params)
+
+    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, **scheduler_params)
+
+    precision = cfg.precision
+    scaler = GradScaler() if precision == "float16" else None
+
+    if precision == "float8" and not TE_AVAILABLE:
+        raise ImportError(
+            "TransformerEngine is not installed.  Please install it to use float8 precision."
+        )
+
+    ckpt_args = {
+        "path": f"{checkpoint_dir}/{cfg.run_id}/checkpoints",
+        "optimizer": optimizer,
+        "scheduler": scheduler,
+        "models": model,
+    }
+
+    loaded_epoch = load_checkpoint(device=dist_manager.device, **ckpt_args)
+
+    if cfg.compile:
+        model = torch.compile(model)
+
+    # Training loop
+    logger.info("Starting training...")
+    for epoch in range(loaded_epoch, cfg.training.num_epochs):
+        # Set the epoch in the samplers
+        train_sampler.set_epoch(epoch)
+        val_sampler.set_epoch(epoch)
+        train_dataloader.dataset.set_indices(list(train_sampler))
+        val_dataloader.dataset.set_indices(list(val_sampler))
+
+        start_time = time.time()
+        # Training phase
+        with Profiler():
+            train_loss = train_epoch(
+                train_dataloader,
+                len(list(train_sampler)),
+                model,
+                output_pad_size,
+                optimizer,
+                scheduler,
+                logger,
+                writer,
+                epoch,
+                cfg,
+                dist_manager,
+                scaler,
+            )
+            end_time = time.time()
+            train_duration = end_time - start_time
+
+            start_time = time.time()
+            # Validation phase
+            val_loss = val_epoch(
+                val_dataloader,
+                len(list(val_sampler)),
+                model,
+                output_pad_size,
+                logger,
+                val_writer,
+                epoch,
+                cfg,
+                dist_manager,
+            )
+            end_time = time.time()
+            val_duration = end_time - start_time
+
+        # Log epoch results
+        logger.info(
+            f"Epoch [{epoch}/{cfg.training.num_epochs}] Train Loss: {train_loss:.6f} [duration: {train_duration:.2f}s] Val Loss: {val_loss:.6f} [duration: {val_duration:.2f}s]"
+        )
+
+        # save checkpoint
+        if epoch % cfg.training.save_interval == 0 and dist_manager.rank == 0:
+            save_checkpoint(**ckpt_args, epoch=epoch + 1)
+
+        if scheduler_name == "StepLR":
+            scheduler.step()
+
+    logger.info("Training completed!")
+
+
+@hydra.main(version_base=None, config_path="conf", config_name="train_surface")
+def launch(cfg: DictConfig):
+    """Launch training with hydra configuration
+
+    Args:
+        cfg: Hydra configuration object
+    """
+
+    # If you want to use `line_profiler` or PyTorch's profiler, enable them here.
+
+    profiler = Profiler()
+    if cfg.profile:
+        profiler.enable("torch")
+        profiler.enable("line_profiler")
+    profiler.initialize()
+    main(cfg)
+    profiler.finalize()
+
+
+if __name__ == "__main__":
+    launch()
diff --git a/examples/cfd/external_aerodynamics/typhon/src/utils.py b/examples/cfd/external_aerodynamics/typhon/src/utils.py
new file mode 100644
index 0000000000..a5484e9747
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/typhon/src/utils.py
@@ -0,0 +1,102 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from collections.abc import Iterable, Sequence
+import torch
+import functools
+
+_SEQUENCE_BLOCKLIST = (torch.Tensor, str, bytes)
+
+
+def _is_tensor_sequence(x):
+    return isinstance(x, Sequence) and not isinstance(x, _SEQUENCE_BLOCKLIST)
+
+
+def _coerce_iterable(arg):
+    """
+    Normalize iterable inputs so tensorwise can unzip any sequence-like object,
+    even if it is only an iterator (e.g., zip objects of strings or constants).
+    """
+    if _is_tensor_sequence(arg):
+        return arg, True
+    if isinstance(arg, Iterable) and not isinstance(arg, _SEQUENCE_BLOCKLIST):
+        return tuple(arg), True
+    return arg, False
+
+
+def tensorwise(fn):
+    """
+    Decorator: allow fn(tensor, ...) or fn(list-of-tensors, ...).
+    If any argument is a sequence of tensors, apply fn elementwise. Non-sequence
+    iterables (zip objects, generators of strings, etc.) are automatically
+    materialized so they can participate in the elementwise zip as well.
+    All sequences must be the same length.
+    """
+
+    @functools.wraps(fn)
+    def wrapper(*args, **kwargs):
+        # Detect sequences while allowing generic iterables (e.g., zip objects)
+        normalized_args = []
+        seq_flags = []
+        for arg in args:
+            normalized_arg, is_seq = _coerce_iterable(arg)
+            normalized_args.append(normalized_arg)
+            seq_flags.append(is_seq)
+
+        normalized_kwargs = {}
+        kw_seq_flags = {}
+        for key, value in kwargs.items():
+            normalized_value, is_seq = _coerce_iterable(value)
+            normalized_kwargs[key] = normalized_value
+            kw_seq_flags[key] = is_seq
+
+        any_seq = any(seq_flags) or any(kw_seq_flags.values())
+
+        if not any_seq:
+            # Nothing is a sequence — call normally
+            return fn(*normalized_args, **normalized_kwargs)
+
+        # All sequence arguments must be sequences of the same length
+        # Collect all sequences (positional + keyword)
+        seq_lengths = {len(a) for a, flag in zip(normalized_args, seq_flags) if flag}
+        seq_lengths.update(
+            len(normalized_kwargs[k]) for k, flag in kw_seq_flags.items() if flag
+        )
+        lengths = seq_lengths
+        if len(lengths) != 1:
+            raise ValueError(
+                f"Sequence arguments must have same length; got lengths {lengths}."
+            )
+
+        L = lengths.pop()
+
+        outs = []
+        for i in range(L):
+            # Rebuild ith positional args
+            ith_args = [
+                (a[i] if is_s else a) for a, is_s in zip(normalized_args, seq_flags)
+            ]
+            # Rebuild ith keyword args
+            ith_kwargs = {
+                k: (v[i] if kw_seq_flags[k] else v)
+                for k, v in normalized_kwargs.items()
+            }
+            outs.append(fn(*ith_args, **ith_kwargs))
+
+        return outs
+
+    return wrapper
diff --git a/examples/cfd/external_aerodynamics/typhon/src/volume_fields_normalization.npz b/examples/cfd/external_aerodynamics/typhon/src/volume_fields_normalization.npz
new file mode 100644
index 0000000000000000000000000000000000000000..c1f0e6f463f1a4efe45af83a2eca8e0ff0c53802
GIT binary patch
literal 1056
zcmWIWW@gc4fB;2?4`=kB|Azt&1`&qb)Wkf!yn;$b1_6dCP*pH`vR|lgKqMnW8AG*t
zN@{U(k-C+Fx=osix{iW+T7FSUQDR<veo;y)NZc(kr#KZTUYwCwkP75$nCfWiDAX!Y
z0J!eQGTN7(nz-xi0vj7cC$oKt3j@spP<&G%-PQ>64Li^`#U&|&)iWmrPT4i?^s-Z`
zkhXgjsACt;HWSS=ryjhtM)pi@=Aez4FWg)9UzQO+uuh8c0NW<3gZ%HbW}wDQ<go=h
zD4t2I7_2eFxnIKJ&JhuZg6se7SD%lxKYmKfD!`kONtYQl+kmnK2y;NQ35aT71d*^z
zgsuyeJYl-nK?<Ruf$<!W1x>8znn1|_yQbMd%b<w@T@xs{v1`g;0&0Q;et<VC8^{GL
MKzJ2Mn=*rV05}NonE(I)

literal 0
HcmV?d00001

diff --git a/physicsnemo/experimental/models/typhon/typhon.py b/physicsnemo/experimental/models/typhon/typhon.py
index 8936bc1a57..3902288aca 100644
--- a/physicsnemo/experimental/models/typhon/typhon.py
+++ b/physicsnemo/experimental/models/typhon/typhon.py
@@ -134,7 +134,7 @@ def compute_slice_attention_cross(
         """
 
         # Project the slice and context tokens:
-        
+
         q_input = torch.cat(slice_tokens, dim=-2)
         q = self.cross_q(q_input)
         
@@ -154,7 +154,7 @@ def compute_slice_attention_cross(
         return cross_attention
 
     def forward(
-        self, x: tuple[torch.Tensor, ...], context: torch.Tensor | None = None
+        self, x: tuple[torch.Tensor, ...], context: tuple[torch.Tensor, ...] | None = None
     ) -> torch.Tensor:
         r"""Forward pass of the GALE module.
 
@@ -195,10 +195,13 @@ def forward(
         
         # HERE, we are differing: apply cross-attention with physical states:
         if context is not None:
-            cross_slice_token = self.compute_slice_attention_cross(
-                slice_tokens, context
-            )
-
+            # cross_slice_token = self.compute_slice_attention_cross(
+            #     slice_tokens, context
+            # )
+            cross_slice_token = [ self.compute_slice_attention_cross([_slice_token], context)[0] 
+                for _slice_token in slice_tokens 
+            ]
+            
             # Apply learnable mixing:
             mixing_weight = torch.sigmoid(self.state_mixing)
             out_slice_token = [ mixing_weight * sst + (1 - mixing_weight) * cst
@@ -316,7 +319,7 @@ def __init__(
                 ),
             )
 
-    def forward(self, fx: torch.Tensor, global_context: torch.Tensor) -> torch.Tensor:
+    def forward(self, fx: tuple[torch.Tensor, ...], global_context: tuple[torch.Tensor, ...]) -> torch.Tensor:
         r"""Forward pass of the GALE block.
 
         Parameters
@@ -577,66 +580,6 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         return slice_tokens
 
-class GeoConvOut(nn.Module):
-    """
-    Geometry layer to project STL geometry data onto regular grids.
-    """
-
-    def __init__(
-        self,
-        input_features: int,
-        neighbors_in_radius: int,
-        base_neurons: int,
-    ):
-        """
-        Initialize the GeoConvOut layer.
-
-        Args:
-            input_features: Number of input feature dimensions
-            neighbors_in_radius: Number of neighbors in radius
-        """
-        super().__init__()
-        self.base_neurons = base_neurons
-
-        input_features_calculated = input_features * neighbors_in_radius
-
-        self.mlp = Mlp(
-            in_features=input_features_calculated,
-            hidden_features=[base_neurons, base_neurons // 2],
-            out_features=base_neurons,
-            act_layer=nn.GELU,
-            drop=0.0,
-        )
-
-        self.activation = nn.GELU
-
-        self.neighbors_in_radius = neighbors_in_radius
-
-    def forward(
-        self,
-        x: torch.Tensor,
-    ) -> torch.Tensor:
-        """
-        Process and project geometric features onto a 3D grid.
-
-        Args:
-            x: Input tensor containing coordinates of the neighboring points
-               (batch_size, n_points, n_neighbors, 3)
-            
-        Returns:
-            Processed geometry features of shape (batch_size, n_points, n_neighbors, base_neurons)
-        """
-
-        b, n_points, n_neighbors, c = x.shape
-        x = rearrange(
-            x, "b x y z -> b x (y z)", x=n_points, y=n_neighbors, z=c
-        )
-        
-        x = F.tanh(self.mlp(x))
-
-        return x
-
-
 def _normalize_dim(x):
     # Accept int as scalar
     if isinstance(x, int):
@@ -838,17 +781,25 @@ def __init__(
                         neighbors_in_radius=neighbors_in_radius[h],
                     ))
 
-                    self.geo_conv_in_list.append(GeoConvOut(
-                        input_features=geometry_dim,
-                        neighbors_in_radius=neighbors_in_radius[h],
-                        base_neurons=n_hidden_local,
-                    ))
+                    self.geo_conv_in_list.append(
+                        Mlp(
+                            in_features=geometry_dim * neighbors_in_radius[h],
+                            hidden_features=[n_hidden_local, n_hidden_local // 2],
+                            out_features=n_hidden_local,
+                            act_layer=nn.GELU,
+                            drop=0.0,
+                        )
+                    )
 
-                    self.geo_conv_out_list.append(GeoConvOut(
-                        input_features=geometry_dim,
-                        neighbors_in_radius=neighbors_in_radius[h],
-                        base_neurons=n_hidden_local,
-                    ))
+                    self.geo_conv_out_list.append(
+                        Mlp(
+                            in_features=geometry_dim * neighbors_in_radius[h],
+                            hidden_features=[n_hidden_local, n_hidden_local // 2],
+                            out_features=n_hidden_local,
+                            act_layer=nn.GELU,
+                            drop=0.0,
+                        )
+                    )
                     
                     self.geometry_features_tokenizer_list.append(ContextProjector(
                         n_hidden_local,
@@ -969,6 +920,7 @@ def forward(
 
         """
 
+        local_embedding = _normalize_tensor(local_embedding)
         # First, construct the global context vectors:
         global_context_input = []
         if geometry is not None:
@@ -976,7 +928,11 @@ def forward(
                 for i in range(len(local_embedding)):
                     for h in range(len(self.radii)):
                         mapping, k_short = self.bq_warp[i][h](local_embedding[i][:, :, :3], geometry)
-                        geometry_features = self.geo_conv_in[i][h](k_short)
+                        b, n_points, n_neighbors, c = k_short.shape
+                        k_short_reshaped = rearrange(
+                            k_short, "b x y z -> b x (y z)", x=n_points, y=n_neighbors, z=c
+                        )
+                        geometry_features = F.tanh(self.geo_conv_in[i][h](k_short_reshaped))
                         geometry_states = self.geometry_features_tokenizer[i][h](geometry_features)
                         global_context_input.append(geometry_states)
             geometry_states = self.geometry_tokenizer(geometry)
@@ -996,12 +952,14 @@ def forward(
                 local_embedding_list_radii = []
                 for h in range(len(self.radii)):
                     mapping, k_short = self.bq_warp[i][h](geometry, local_embedding[i][:, :, :3])
-                    local_features = self.geo_conv_out[i][h](k_short)
+                    b, n_points, n_neighbors, c = k_short.shape
+                    k_short_reshaped = rearrange(
+                        k_short, "b x y z -> b x (y z)", x=n_points, y=n_neighbors, z=c
+                    )
+                    local_features = F.tanh(self.geo_conv_out[i][h](k_short_reshaped))
                     local_embedding_list_radii.append(local_features)
                 local_embedding_bq.append(torch.cat(local_embedding_list_radii, dim=-1))
 
-        local_embedding = _normalize_tensor(local_embedding)
-
         # Project the inputs to the hidden dimension:
         x = [ self.preprocess[i](le) for i, le in enumerate(local_embedding) ]
 

From 158376d1b947f48a109d14f45ea256c50cc8041f Mon Sep 17 00:00:00 2001
From: Rishi Ranade <rranade@oci-hsg-cs-001-login-01.cm.cluster>
Date: Sat, 6 Dec 2025 12:36:06 -0800
Subject: [PATCH 10/32] updating transolver recipe configs

---
 .../transolver/src/conf/datapipe/core.yaml    |  2 +-
 .../transolver/src/conf/model/typhon.yaml     |  2 +-
 .../transolver/src/conf/typhon_combined.yaml  | 48 -------------------
 .../transolver/src/conf/typhon_surface.yaml   | 48 -------------------
 .../transolver/src/conf/typhon_volume.yaml    | 48 -------------------
 5 files changed, 2 insertions(+), 146 deletions(-)
 delete mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/typhon_combined.yaml
 delete mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
 delete mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml

diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/core.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/core.yaml
index cb972abefe..67adcb45f6 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/core.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/core.yaml
@@ -30,7 +30,7 @@ preload_depth: 1
 pin_memory: true
 
 # Sampling resolution of the point clouds:
-resolution: 50_000
+resolution: 200_000
 
 # Surface / Volume / (combined, if supported)
 mode: ???
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml
index 38b1fd8feb..2745a00ab6 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml
+++ b/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml
@@ -25,7 +25,7 @@ dropout: 0.0
 n_head: 8
 act: "gelu"
 mlp_ratio: 2
-slice_num: 256
+slice_num: 128
 use_te: false
 plus: false
 include_local_features: true # use local features
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_combined.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_combined.yaml
deleted file mode 100644
index 8da413783c..0000000000
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_combined.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-defaults:
-  - training: base
-  - model: typhon
-  - datapipe: combined
-
-output_dir: "runs"
-checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
-run_id: "typhon/combined/bfloat16"
-
-model:
-  functional_dim: 
-   - 6 
-   - 7
-  out_dim: 
-   - 4
-   - 5
-
-# Performance considerations:
-precision: float32 # float32, float16, bfloat16, or float8
-compile: false
-profile: false
-
-datapipe:
-  include_geometry: true
-  geometry_sampling: 300_000
-  broadcast_global_features: false
-
-
-# Logging configuration
-logging:
-  level: INFO
-  format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
deleted file mode 100644
index 50dc1356f2..0000000000
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_surface.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-defaults:
-  - training: base
-  - model: typhon
-  - datapipe: surface
-
-output_dir: "runs"
-checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
-run_id: "typhon/surface/bq"
-
-# Performance considerations:
-precision: float32 # float32, float16, bfloat16, or float8
-compile: true
-profile: false
-
-model:
-  functional_dim: 6
-  include_local_features: true # use local features
-  radii: [0.05, 0.25, 1.0, 2.5] # radius for local features
-  neighbors_in_radius: [16, 64, 128, 256] # neighbors in radius for local features
-  n_hidden_local: 32 # hidden dimension for local features
-
-datapipe:
-  include_sdf: false
-  include_geometry: true
-  geometry_sampling: 300_000
-  broadcast_global_features: false
-
-
-# Logging configuration
-logging:
-  level: INFO
-  format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml
deleted file mode 100644
index 37ade392d1..0000000000
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/typhon_volume.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-defaults:
-  - training: base
-  - model: typhon
-  - datapipe: volume
-
-output_dir: "runs"
-checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
-run_id: "typhon/volume/bq"
-
-# Performance considerations:
-precision: float32 # float32, float16, bfloat16, or float8
-compile: false
-profile: false
-
-datapipe:
-  include_geometry: true
-  geometry_sampling: 300_000
-  broadcast_global_features: false
-
-
-model:
-  functional_dim: 7
-  out_dim: 5
-  include_local_features: true # use local features
-  radii: [0.05, 0.25, 1.0, 2.5] # radius for local features
-  neighbors_in_radius: [16, 64, 128, 256] # neighbors in radius for local features
-  n_hidden_local: 64 # hidden dimension for local features
-
-# Logging configuration
-logging:
-  level: INFO
-  format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'

From ae9e5b38dd9a54e27efc5d744493298b70e41df7 Mon Sep 17 00:00:00 2001
From: Rishi Ranade <rranade@oci-hsg-cs-001-login-01.cm.cluster>
Date: Sun, 7 Dec 2025 18:18:36 -0800
Subject: [PATCH 11/32] fixing errors in inference_on_zarr and compute_norms

---
 .../typhon/src/compute_normalizations.py      | 10 ++--
 .../typhon/src/inference_on_zarr.py           | 59 +++++++++++--------
 2 files changed, 40 insertions(+), 29 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/typhon/src/compute_normalizations.py b/examples/cfd/external_aerodynamics/typhon/src/compute_normalizations.py
index 749a7ab2f7..c75fdc9af7 100644
--- a/examples/cfd/external_aerodynamics/typhon/src/compute_normalizations.py
+++ b/examples/cfd/external_aerodynamics/typhon/src/compute_normalizations.py
@@ -113,10 +113,10 @@ def main(cfg: DictConfig) -> None:
     """
 
     # Choose which field to normalize (can be overridden via command line)
-    field_key: str = cfg.data.mode + "_fields"
+    field_key: str = cfg.datapipe.mode + "_fields"
 
     # Normalization directory can be configured (backward compatible: defaults to current directory)
-    normalization_dir: str = getattr(cfg.data, "normalization_dir", ".")
+    normalization_dir: str = getattr(cfg.datapipe, "normalization_dir", ".")
 
     # Construct full path using pathlib (cross-platform, concise)
     workspace_path: str = str(
@@ -127,14 +127,14 @@ def main(cfg: DictConfig) -> None:
 
     # Create the dataset using configuration parameters
     dataset = CAEDataset(
-        data_dir=cfg.data.train.data_path,
+        data_dir=cfg.datapipe.train.data_path,
         keys_to_read=[
             field_key,
         ],
         keys_to_read_if_available={},
         output_device=device,
-        preload_depth=cfg.data.preload_depth,
-        pin_memory=cfg.data.pin_memory,
+        preload_depth=cfg.datapipe.preload_depth,
+        pin_memory=cfg.datapipe.pin_memory,
     )
     # Compute normalization statistics
     mean, std, min_val, max_val = compute_mean_std_min_max(dataset, field_key, 100)
diff --git a/examples/cfd/external_aerodynamics/typhon/src/inference_on_zarr.py b/examples/cfd/external_aerodynamics/typhon/src/inference_on_zarr.py
index 64f653352f..8b1f1f5e3d 100644
--- a/examples/cfd/external_aerodynamics/typhon/src/inference_on_zarr.py
+++ b/examples/cfd/external_aerodynamics/typhon/src/inference_on_zarr.py
@@ -199,8 +199,9 @@ def batched_inference_loop(
     metrics = {k: v / global_weight for k, v in metrics.items()}
     loss = loss / global_weight
 
-    global_predictions = torch.cat([l[0] for l in global_preds_targets], dim=1)
-    global_targets = torch.cat([l[1] for l in global_preds_targets], dim=1)
+    # import pdb; pdb.set_trace()
+    global_predictions = torch.cat([l[0][0] for l in global_preds_targets], dim=1)
+    global_targets = torch.cat([l[1][0] for l in global_preds_targets], dim=1)
 
     # Now, we have to *unshuffle* the prediction to the original index
     inverse_indices = torch.empty_like(indices)
@@ -253,17 +254,26 @@ def inference(cfg: DictConfig) -> None:
     logger.info(f"Number of parameters: {num_params}")
 
     # Load the normalization file from configured directory (defaults to current dir)
-    norm_dir = getattr(cfg.data, "normalization_dir", ".")
-    if cfg.data.mode == "surface":
+    norm_dir = getattr(cfg.datapipe, "normalization_dir", ".")
+    if cfg.datapipe.mode == "surface" or cfg.datapipe.mode == "combined":
         norm_file = str(Path(norm_dir) / "surface_fields_normalization.npz")
-    elif cfg.data.mode == "volume":
-        norm_file = str(Path(norm_dir) / "volume_fields_normalization.npz")
+        norm_data = np.load(norm_file)
+        surface_factors = {
+            "mean": torch.from_numpy(norm_data["mean"]).to(dist_manager.device),
+            "std": torch.from_numpy(norm_data["std"]).to(dist_manager.device),
+        }
+    else:
+        surface_factors = None
 
-    norm_data = np.load(norm_file)
-    norm_factors = {
-        "mean": torch.from_numpy(norm_data["mean"]).to(dist_manager.device),
-        "std": torch.from_numpy(norm_data["std"]).to(dist_manager.device),
-    }
+    if cfg.datapipe.mode == "volume" or cfg.datapipe.mode == "combined":
+        norm_file = str(Path(norm_dir) / "volume_fields_normalization.npz")
+        norm_data = np.load(norm_file)
+        volume_factors = {
+            "mean": torch.from_numpy(norm_data["mean"]).to(dist_manager.device),
+            "std": torch.from_numpy(norm_data["std"]).to(dist_manager.device),
+        }
+    else:
+        volume_factors = None
 
     if cfg.compile:
         model = torch.compile(model, dynamic=True)
@@ -273,21 +283,22 @@ def inference(cfg: DictConfig) -> None:
     # so there is not downsampling.  We still batch it in the inference script
     # for memory usage constraints.
 
-    batch_resolution = cfg.data.resolution
-    cfg.data.resolution = None
+    batch_resolution = cfg.datapipe.resolution
+    cfg.datapipe.resolution = None
     ## Make sure to read the whole data sample for volume:
-    if cfg.data.mode == "volume":
-        cfg.data.volume_sample_from_disk = False
+    if cfg.datapipe.mode == "volume":
+        cfg.datapipe.volume_sample_from_disk = False
 
     # And we need the mesh features for drag, lift in surface data:
-    if cfg.data.mode == "surface":
-        cfg.data.return_mesh_features = True
+    if cfg.datapipe.mode == "surface":
+        cfg.datapipe.return_mesh_features = True
 
     # Validation dataset
     val_dataset = create_transolver_dataset(
-        cfg.data,
+        cfg.datapipe,
         phase="val",
-        scaling_factors=norm_factors,
+        surface_factors=surface_factors,
+        volume_factors=volume_factors,
     )
 
     results = []
@@ -299,7 +310,7 @@ def inference(cfg: DictConfig) -> None:
                     batch,
                     model,
                     cfg.precision,
-                    cfg.data.mode,
+                    cfg.datapipe.mode,
                     batch_resolution,
                     output_pad_size,
                     dist_manager,
@@ -311,7 +322,7 @@ def inference(cfg: DictConfig) -> None:
         logger.info(f"Finished batch {batch_idx} in {elapsed:.4f} seconds")
         start = time.time()
 
-        if cfg.data.mode == "surface":
+        if cfg.datapipe.mode == "surface":
             coeff = 1.0
 
             # Compute the drag and loss coefficients:
@@ -404,7 +415,7 @@ def inference(cfg: DictConfig) -> None:
                 ]
             )
 
-        elif cfg.data.mode == "volume":
+        elif cfg.datapipe.mode == "volume":
             # Extract metric values and convert tensors to floats
             l2_pressure = (
                 metrics["l2_pressure_vol"].item()
@@ -445,7 +456,7 @@ def inference(cfg: DictConfig) -> None:
                 ]
             )
 
-    if cfg.data.mode == "surface":
+    if cfg.datapipe.mode == "surface":
         pred_drag_coeffs = [r[6] for r in results]
         pred_lift_coeffs = [r[7] for r in results]
         true_drag_coeffs = [r[8] for r in results]
@@ -474,7 +485,7 @@ def inference(cfg: DictConfig) -> None:
         logger.info(f"R2 score for lift: {r2_lift:.4f}")
         logger.info(f"R2 score for drag: {r2_drag:.4f}")
 
-    elif cfg.data.mode == "volume":
+    elif cfg.datapipe.mode == "volume":
         headers = [
             "Batch",
             "Loss",

From c7b92a5b329249d46a2bf619134059364042b717 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 8 Dec 2025 09:31:57 -0800
Subject: [PATCH 12/32] Starting to add tests to Typhon with BQ.  Not yet fully
 functional.

---
 .../experimental/models/typhon/typhon.py      |  20 +
 test/models/typhon/__init__.py                |  16 +
 test/models/typhon/test_context_projector.py  |  87 ++
 test/models/typhon/test_gale.py               | 280 +++++++
 test/models/typhon/test_typhon.py             | 788 ++++++++++++++++++
 5 files changed, 1191 insertions(+)
 create mode 100644 test/models/typhon/__init__.py
 create mode 100644 test/models/typhon/test_context_projector.py
 create mode 100644 test/models/typhon/test_gale.py
 create mode 100644 test/models/typhon/test_typhon.py

diff --git a/physicsnemo/experimental/models/typhon/typhon.py b/physicsnemo/experimental/models/typhon/typhon.py
index 3902288aca..851fc42b8f 100644
--- a/physicsnemo/experimental/models/typhon/typhon.py
+++ b/physicsnemo/experimental/models/typhon/typhon.py
@@ -917,9 +917,20 @@ def forward(
         projecting them onto physical state spaces. These context embeddings are then used
         in all GALE blocks via cross-attention, allowing geometric and global information to
         guide the learned physical state dynamics.
+        
 
         """
 
+        single_input = isinstance(local_embedding, torch.Tensor)
+
+        print(f"type of local_embedding: {type(local_embedding)}")
+        print(f"type of global_embedding: {type(global_embedding)}")
+        print(f"type of geometry: {type(geometry)}")
+
+        if time is not None:
+            raise NotImplementedError("Time input is not implemented yet."
+                                      "Error rather than silently ignoring it.")
+
         local_embedding = _normalize_tensor(local_embedding)
         # First, construct the global context vectors:
         global_context_input = []
@@ -945,6 +956,9 @@ def forward(
         # Construct the embedding states:
         if len(global_context_input) > 0:
             embedding_states = torch.cat(global_context_input, dim=-1)
+        else:
+            embedding_states = None
+
 
         if self.include_local_features and geometry is not None:
             local_embedding_bq = []
@@ -972,4 +986,10 @@ def forward(
         # Now, pass the data through the model:
         x = [self.ln_mlp_out[i](x[i]) for i in range(len(x))]
 
+        if single_input:
+            # If only one input came in, use just that as output:
+            x = x[0]
+        else:
+            x = tuple(x)
+
         return x
diff --git a/test/models/typhon/__init__.py b/test/models/typhon/__init__.py
new file mode 100644
index 0000000000..69e0c20f24
--- /dev/null
+++ b/test/models/typhon/__init__.py
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/test/models/typhon/test_context_projector.py b/test/models/typhon/test_context_projector.py
new file mode 100644
index 0000000000..83029ee7bb
--- /dev/null
+++ b/test/models/typhon/test_context_projector.py
@@ -0,0 +1,87 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import torch
+
+from physicsnemo.experimental.models.typhon.typhon import (
+    ContextProjector,
+)
+
+# =============================================================================
+# ContextProjector Tests
+# =============================================================================
+
+
+@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
+def test_context_projector_forward(device):
+    """Test ContextProjector forward pass."""
+    torch.manual_seed(42)
+
+    dim = 64
+    heads = 4
+    dim_head = 16
+    slice_num = 8
+    batch_size = 2
+    n_tokens = 100
+
+    projector = ContextProjector(
+        dim=dim,
+        heads=heads,
+        dim_head=dim_head,
+        dropout=0.0,
+        slice_num=slice_num,
+        use_te=False,
+        plus=False,
+    ).to(device)
+
+    x = torch.randn(batch_size, n_tokens, dim).to(device)
+
+    slice_tokens = projector(x)
+
+    # Output shape: [Batch, Heads, Slice_num, dim_head]
+    assert slice_tokens.shape == (batch_size, heads, slice_num, dim_head)
+    assert not torch.isnan(slice_tokens).any()
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+def test_context_projector_plus_mode(device):
+    """Test ContextProjector with Transolver++ mode."""
+    torch.manual_seed(42)
+
+    dim = 64
+    heads = 4
+    dim_head = 16
+    slice_num = 8
+    batch_size = 2
+    n_tokens = 100
+
+    projector = ContextProjector(
+        dim=dim,
+        heads=heads,
+        dim_head=dim_head,
+        dropout=0.0,
+        slice_num=slice_num,
+        use_te=False,
+        plus=True,
+    ).to(device)
+
+    x = torch.randn(batch_size, n_tokens, dim).to(device)
+
+    slice_tokens = projector(x)
+
+    assert slice_tokens.shape == (batch_size, heads, slice_num, dim_head)
+    assert not torch.isnan(slice_tokens).any()
diff --git a/test/models/typhon/test_gale.py b/test/models/typhon/test_gale.py
new file mode 100644
index 0000000000..4ecc2ff587
--- /dev/null
+++ b/test/models/typhon/test_gale.py
@@ -0,0 +1,280 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import torch
+
+from physicsnemo.experimental.models.typhon.typhon import (
+    GALE,
+    GALE_block,
+)
+
+# =============================================================================
+# GALE (Geometry-Aware Latent Embeddings) Attention Tests
+# =============================================================================
+
+
+@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
+def test_gale_forward_basic(device):
+    """Test GALE attention layer forward pass without context."""
+    torch.manual_seed(42)
+
+    dim = 64
+    heads = 4
+    dim_head = 16
+    slice_num = 8
+    batch_size = 2
+    n_tokens = 100
+
+    gale = GALE(
+        dim=dim,
+        heads=heads,
+        dim_head=dim_head,
+        dropout=0.0,
+        slice_num=slice_num,
+        use_te=False,
+        plus=False,
+        context_dim=dim_head,  # Must match dim_head for cross attention
+    ).to(device)
+
+    # Single input tensor wrapped in tuple
+    x = torch.randn(batch_size, n_tokens, dim).to(device)
+
+    outputs = gale((x,), context=None)
+
+    assert len(outputs) == 1
+    assert outputs[0].shape == (batch_size, n_tokens, dim)
+    assert not torch.isnan(outputs[0]).any()
+
+
+@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
+def test_gale_forward_with_context(device):
+    """Test GALE attention layer forward pass with cross-attention context."""
+    torch.manual_seed(42)
+
+    dim = 64
+    heads = 4
+    dim_head = 16
+    slice_num = 8
+    batch_size = 2
+    n_tokens = 100
+    context_tokens = 32
+    context_dim = dim_head
+
+    gale = GALE(
+        dim=dim,
+        heads=heads,
+        dim_head=dim_head,
+        dropout=0.0,
+        slice_num=slice_num,
+        use_te=False,
+        plus=False,
+        context_dim=context_dim,
+    ).to(device)
+
+    x = torch.randn(batch_size, n_tokens, dim).to(device)
+    context = torch.randn(batch_size, heads, context_tokens, context_dim).to(device)
+
+    outputs = gale((x,), context=context)
+
+    assert len(outputs) == 1
+    assert outputs[0].shape == (batch_size, n_tokens, dim)
+    assert not torch.isnan(outputs[0]).any()
+
+
+@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
+def test_gale_forward_multiple_inputs(device):
+    """Test GALE attention layer with multiple input tensors."""
+    torch.manual_seed(42)
+
+    dim = 64
+    heads = 4
+    dim_head = 16
+    slice_num = 8
+    batch_size = 2
+    n_tokens_1 = 100
+    n_tokens_2 = 150
+    context_dim = dim_head
+
+    gale = GALE(
+        dim=dim,
+        heads=heads,
+        dim_head=dim_head,
+        dropout=0.0,
+        slice_num=slice_num,
+        use_te=False,
+        plus=False,
+        context_dim=context_dim,
+    ).to(device)
+
+    x1 = torch.randn(batch_size, n_tokens_1, dim).to(device)
+    x2 = torch.randn(batch_size, n_tokens_2, dim).to(device)
+
+    outputs = gale((x1, x2), context=None)
+
+    assert len(outputs) == 2
+    assert outputs[0].shape == (batch_size, n_tokens_1, dim)
+    assert outputs[1].shape == (batch_size, n_tokens_2, dim)
+    assert not torch.isnan(outputs[0]).any()
+    assert not torch.isnan(outputs[1]).any()
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+def test_gale_plus_mode(device):
+    """Test GALE attention with Transolver++ mode."""
+    torch.manual_seed(42)
+
+    dim = 64
+    heads = 4
+    dim_head = 16
+    slice_num = 8
+    batch_size = 2
+    n_tokens = 100
+
+    gale = GALE(
+        dim=dim,
+        heads=heads,
+        dim_head=dim_head,
+        dropout=0.0,
+        slice_num=slice_num,
+        use_te=False,
+        plus=True,  # Enable Transolver++ features
+        context_dim=dim_head,
+    ).to(device)
+
+    x = torch.randn(batch_size, n_tokens, dim).to(device)
+
+    outputs = gale((x,), context=None)
+
+    assert len(outputs) == 1
+    assert outputs[0].shape == (batch_size, n_tokens, dim)
+    assert not torch.isnan(outputs[0]).any()
+
+
+# =============================================================================
+# GALE_block Tests
+# =============================================================================
+
+
+@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
+def test_gale_block_forward(device):
+    """Test GALE_block transformer block forward pass."""
+    torch.manual_seed(42)
+
+    hidden_dim = 64
+    n_head = 4
+    batch_size = 2
+    n_tokens = 100
+    slice_num = 8
+    context_dim = hidden_dim // n_head
+
+    block = GALE_block(
+        num_heads=n_head,
+        hidden_dim=hidden_dim,
+        dropout=0.0,
+        act="gelu",
+        mlp_ratio=4,
+        last_layer=False,
+        out_dim=1,
+        slice_num=slice_num,
+        use_te=False,
+        plus=False,
+        context_dim=context_dim,
+    ).to(device)
+
+    x = torch.randn(batch_size, n_tokens, hidden_dim).to(device)
+    context = torch.randn(batch_size, n_head, slice_num, context_dim).to(device)
+
+    outputs = block((x,), global_context=context)
+
+    assert len(outputs) == 1
+    assert outputs[0].shape == (batch_size, n_tokens, hidden_dim)
+    assert not torch.isnan(outputs[0]).any()
+
+
+@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
+def test_gale_block_multiple_inputs(device):
+    """Test GALE_block with multiple input tensors."""
+    torch.manual_seed(42)
+
+    hidden_dim = 64
+    n_head = 4
+    batch_size = 2
+    n_tokens_1 = 100
+    n_tokens_2 = 150
+    slice_num = 8
+    context_dim = hidden_dim // n_head
+
+    block = GALE_block(
+        num_heads=n_head,
+        hidden_dim=hidden_dim,
+        dropout=0.0,
+        act="gelu",
+        mlp_ratio=4,
+        last_layer=False,
+        out_dim=1,
+        slice_num=slice_num,
+        use_te=False,
+        plus=False,
+        context_dim=context_dim,
+    ).to(device)
+
+    x1 = torch.randn(batch_size, n_tokens_1, hidden_dim).to(device)
+    x2 = torch.randn(batch_size, n_tokens_2, hidden_dim).to(device)
+    context = torch.randn(batch_size, n_head, slice_num, context_dim).to(device)
+
+    outputs = block((x1, x2), global_context=context)
+
+    assert len(outputs) == 2
+    assert outputs[0].shape == (batch_size, n_tokens_1, hidden_dim)
+    assert outputs[1].shape == (batch_size, n_tokens_2, hidden_dim)
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+def test_gale_mixing_weight_gradient(device):
+    """Test that GALE mixing weight receives gradients."""
+    torch.manual_seed(42)
+
+    dim = 64
+    heads = 4
+    dim_head = 16
+    slice_num = 8
+    batch_size = 2
+    n_tokens = 100
+    context_tokens = 32
+    context_dim = dim_head
+
+    gale = GALE(
+        dim=dim,
+        heads=heads,
+        dim_head=dim_head,
+        dropout=0.0,
+        slice_num=slice_num,
+        use_te=False,
+        plus=False,
+        context_dim=context_dim,
+    ).to(device)
+
+    x = torch.randn(batch_size, n_tokens, dim, requires_grad=True).to(device)
+    context = torch.randn(batch_size, heads, context_tokens, context_dim).to(device)
+
+    outputs = gale((x,), context=context)
+    loss = outputs[0].sum()
+    loss.backward()
+
+    # Check that state_mixing parameter receives gradient
+    assert gale.state_mixing.grad is not None
+    assert gale.state_mixing.grad != 0
diff --git a/test/models/typhon/test_typhon.py b/test/models/typhon/test_typhon.py
new file mode 100644
index 0000000000..39b150f8a6
--- /dev/null
+++ b/test/models/typhon/test_typhon.py
@@ -0,0 +1,788 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import random
+import sys
+
+import pytest
+import torch
+from pytest_utils import import_or_fail
+
+from physicsnemo.experimental.models.typhon.typhon import (
+    Typhon,
+)
+
+# Add parent directory to path for imports
+script_path = os.path.abspath(__file__)
+sys.path.append(os.path.join(os.path.dirname(script_path), ".."))
+
+from common import (  # noqa E402
+    validate_amp,
+    validate_checkpoint,
+    validate_combo_optims,
+    validate_cuda_graphs,
+    validate_forward_accuracy,
+    validate_jit,
+)
+
+
+# =============================================================================
+# Typhon End-to-End Model Tests
+# =============================================================================
+
+
+@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
+@pytest.mark.parametrize("use_geometry", [False, True])
+@pytest.mark.parametrize("use_global", [False, True])
+def test_typhon_forward(device, use_geometry, use_global):
+    """Test Typhon model forward pass with optional geometry and global context."""
+    torch.manual_seed(42)
+
+    batch_size = 2
+    n_tokens = 100
+    n_geom_tokens = 345
+    n_global = 5
+    geometry_dim = 3
+    global_dim = 16
+
+    model = Typhon(
+        functional_dim=32,
+        out_dim=4,
+        geometry_dim=geometry_dim if use_geometry else None,
+        global_dim=global_dim if use_global else None,
+        n_layers=2,
+        n_hidden=64,
+        dropout=0.0,
+        n_head=4,
+        act="gelu",
+        mlp_ratio=2,
+        slice_num=8,
+        use_te=False,
+        time_input=False,
+        plus=False,
+        include_local_features=False,
+    ).to(device)
+
+    local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
+
+    kwargs = {}
+    if use_geometry:
+        kwargs["geometry"] = torch.randn(batch_size, n_geom_tokens, geometry_dim).to(
+            device
+        )
+    if use_global:
+        kwargs["global_embedding"] = torch.randn(batch_size, n_global, global_dim).to(
+            device
+        )
+
+    outputs = model(local_emb, **kwargs)
+
+    assert isinstance(outputs, torch.Tensor)
+    assert outputs.shape == (batch_size, n_tokens, 4)
+    assert not torch.isnan(outputs).any()
+
+
+@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
+def test_typhon_forward_tuple_inputs(device):
+    """Test Typhon model forward pass with tuple inputs/outputs (multi-head)."""
+    torch.manual_seed(42)
+
+    functional_dims = (32, 48)
+    out_dims = (4, 6)
+
+    model = Typhon(
+        functional_dim=functional_dims,
+        out_dim=out_dims,
+        geometry_dim=3,
+        global_dim=16,
+        n_layers=2,
+        n_hidden=64,
+        dropout=0.0,
+        n_head=4,
+        act="gelu",
+        mlp_ratio=2,
+        slice_num=8,
+        use_te=False,
+        time_input=False,
+        plus=False,
+        include_local_features=False,
+    ).to(device)
+
+    batch_size = 2
+    n_tokens_1 = 100
+    n_tokens_2 = 150
+    n_geom = 235
+    n_global = 5
+
+    local_emb_1 = torch.randn(batch_size, n_tokens_1, functional_dims[0]).to(device)
+    local_emb_2 = torch.randn(batch_size, n_tokens_2, functional_dims[1]).to(device)
+    geometry = torch.randn(batch_size, n_geom, 3).to(device)
+    global_emb = torch.randn(batch_size, n_global, 16).to(device)
+
+    outputs = model(
+        (local_emb_1, local_emb_2),
+        global_embedding=global_emb,
+        geometry=geometry,
+    )
+
+    assert len(outputs) == 2
+    assert all(isinstance(output, torch.Tensor) for output in outputs)
+    assert outputs[0].shape == (batch_size, n_tokens_1, out_dims[0])
+    assert outputs[1].shape == (batch_size, n_tokens_2, out_dims[1])
+    assert not torch.isnan(outputs[0]).any()
+    assert not torch.isnan(outputs[1]).any()
+
+
+@import_or_fail("warp")
+@pytest.mark.parametrize("device", ["cuda:0"])
+def test_typhon_forward_with_local_features(device, pytestconfig):
+    """Test Typhon model forward pass with local features (BQ warp)."""
+    torch.manual_seed(42)
+
+    model = Typhon(
+        functional_dim=32,
+        out_dim=4,
+        geometry_dim=3,
+        global_dim=16,
+        n_layers=2,
+        n_hidden=64,
+        dropout=0.0,
+        n_head=4,
+        act="gelu",
+        mlp_ratio=2,
+        slice_num=8,
+        use_te=False,
+        time_input=False,
+        plus=False,
+        include_local_features=True,
+        radii=[0.05, 0.25],
+        neighbors_in_radius=[8, 32],
+        n_hidden_local=32,
+    ).to(device)
+
+    batch_size = 2
+    n_tokens = 100
+    n_global = 5
+    n_geom = 235
+
+    # For local features, the first 3 channels of local_emb should be coordinates
+    local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
+    geometry = torch.randn(batch_size, n_geom, 3).to(device)
+    global_emb = torch.randn(batch_size, n_global, 16).to(device)
+
+    outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
+
+    assert isinstance(outputs, torch.Tensor)
+    assert outputs[0].shape == (batch_size, n_tokens, 4)
+    assert not torch.isnan(outputs[0]).any()
+
+
+# =============================================================================
+# Forward Accuracy Tests (reproducibility)
+# =============================================================================
+
+
+@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
+def test_typhon_forward_accuracy_basic(device):
+    """Test Typhon basic forward pass accuracy."""
+    torch.manual_seed(42)
+
+    model = Typhon(
+        functional_dim=32,
+        out_dim=4,
+        geometry_dim=3,
+        global_dim=16,
+        n_layers=2,
+        n_hidden=64,
+        dropout=0.0,
+        n_head=4,
+        act="gelu",
+        mlp_ratio=2,
+        slice_num=8,
+        use_te=False,
+        time_input=False,
+        plus=False,
+        include_local_features=False,
+    ).to(device)
+
+    batch_size = 2
+    n_tokens = 100
+    n_geom = 235
+    n_global = 5
+
+    local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
+    geometry = torch.randn(batch_size, n_geom, 3).to(device)
+    global_emb = torch.randn(batch_size, n_global, 16).to(device)
+
+    assert validate_forward_accuracy(
+        model,
+        (local_emb, global_emb, geometry),
+        file_name="typhon_basic_output.pth",
+        atol=1e-3,
+    )
+
+
+@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
+def test_typhon_forward_accuracy_tuple(device):
+    """Test Typhon forward pass accuracy with tuple inputs."""
+    torch.manual_seed(42)
+
+    functional_dims = (32, 48)
+    out_dims = (4, 6)
+
+    model = Typhon(
+        functional_dim=functional_dims,
+        out_dim=out_dims,
+        geometry_dim=3,
+        global_dim=16,
+        n_layers=2,
+        n_hidden=64,
+        dropout=0.0,
+        n_head=4,
+        act="gelu",
+        mlp_ratio=2,
+        slice_num=8,
+        use_te=False,
+        time_input=False,
+        plus=False,
+        include_local_features=False,
+    ).to(device)
+
+    batch_size = 2
+    n_tokens_1 = 100
+    n_tokens_2 = 150
+    n_global = 5
+    n_geom = 235
+
+    local_emb_1 = torch.randn(batch_size, n_tokens_1, functional_dims[0]).to(device)
+    local_emb_2 = torch.randn(batch_size, n_tokens_2, functional_dims[1]).to(device)
+    geometry = torch.randn(batch_size, n_geom, 3).to(device)
+    global_emb = torch.randn(batch_size, n_global, 16).to(device)
+
+    assert validate_forward_accuracy(
+        model,
+        ((local_emb_1, local_emb_2), global_emb, geometry),
+        file_name="typhon_tuple_output.pth",
+        atol=1e-3,
+    )
+
+
+# =============================================================================
+# Optimization Tests
+# =============================================================================
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+def test_typhon_optimizations(device):
+    """Test Typhon optimizations (CUDA graphs, JIT, AMP, combo)."""
+
+    def setup_model():
+        """Setup fresh Typhon model and inputs for each optimization test."""
+        model = Typhon(
+            functional_dim=32,
+            out_dim=4,
+            geometry_dim=3,
+            global_dim=16,
+            n_layers=2,
+            n_hidden=64,
+            dropout=0.0,
+            n_head=4,
+            act="gelu",
+            mlp_ratio=2,
+            slice_num=8,
+            use_te=False,
+            time_input=False,
+            plus=False,
+            include_local_features=False,
+        ).to(device)
+
+        batch_size = 2
+        n_tokens = 100
+        n_global = 5
+
+        local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
+        geometry = torch.randn(batch_size, n_tokens, 3).to(device)
+        global_emb = torch.randn(batch_size, n_global, 16).to(device)
+
+        return model, local_emb, global_emb, geometry
+
+    # Check CUDA graphs
+    model, local_emb, global_emb, geometry = setup_model()
+    assert validate_cuda_graphs(
+        model,
+        (local_emb, global_emb, geometry),
+    )
+
+    # Check JIT
+    model, local_emb, global_emb, geometry = setup_model()
+    assert validate_jit(
+        model,
+        (local_emb, global_emb, geometry),
+    )
+
+    # Check AMP
+    model, local_emb, global_emb, geometry = setup_model()
+    assert validate_amp(
+        model,
+        (local_emb, global_emb, geometry),
+    )
+
+    # Check Combo
+    model, local_emb, global_emb, geometry = setup_model()
+    assert validate_combo_optims(
+        model,
+        (local_emb, global_emb, geometry),
+    )
+
+
+# =============================================================================
+# Transformer Engine Tests
+# =============================================================================
+
+
+@import_or_fail("transformer_engine")
+@pytest.mark.parametrize("device", ["cuda:0"])
+def test_typhon_te_basic(device, pytestconfig):
+    """Test Typhon with Transformer Engine backend."""
+    torch.manual_seed(42)
+
+    model = Typhon(
+        functional_dim=32,
+        out_dim=4,
+        geometry_dim=3,
+        global_dim=16,
+        n_layers=2,
+        n_hidden=64,
+        dropout=0.0,
+        n_head=4,
+        act="gelu",
+        mlp_ratio=2,
+        slice_num=8,
+        use_te=True,
+        time_input=False,
+        plus=False,
+        include_local_features=False,
+    ).to(device)
+
+    batch_size = 2
+    n_tokens = 100
+    n_global = 5
+
+    local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
+    geometry = torch.randn(batch_size, n_tokens, 3).to(device)
+    global_emb = torch.randn(batch_size, n_global, 16).to(device)
+
+    assert validate_forward_accuracy(
+        model,
+        (local_emb, global_emb, geometry),
+        file_name="typhon_te_output.pth",
+        atol=1e-3,
+    )
+
+
+# =============================================================================
+# Checkpoint Tests
+# =============================================================================
+
+
+@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
+def test_typhon_checkpoint(device):
+    """Test Typhon checkpoint save/load."""
+    torch.manual_seed(42)
+
+    model_1 = Typhon(
+        functional_dim=32,
+        out_dim=4,
+        geometry_dim=3,
+        global_dim=16,
+        n_layers=2,
+        n_hidden=64,
+        dropout=0.0,
+        n_head=4,
+        act="gelu",
+        mlp_ratio=2,
+        slice_num=8,
+        use_te=False,
+        time_input=False,
+        plus=False,
+        include_local_features=False,
+    ).to(device)
+
+    model_2 = Typhon(
+        functional_dim=32,
+        out_dim=4,
+        geometry_dim=3,
+        global_dim=16,
+        n_layers=2,
+        n_hidden=64,
+        dropout=0.0,
+        n_head=4,
+        act="gelu",
+        mlp_ratio=2,
+        slice_num=8,
+        use_te=False,
+        time_input=False,
+        plus=False,
+        include_local_features=False,
+    ).to(device)
+
+    batch_size = random.randint(1, 2)
+    n_tokens = 100
+    n_global = 5
+
+    local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
+    geometry = torch.randn(batch_size, n_tokens, 3).to(device)
+    global_emb = torch.randn(batch_size, n_global, 16).to(device)
+
+    assert validate_checkpoint(
+        model_1,
+        model_2,
+        (local_emb, global_emb, geometry),
+    )
+
+
+@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
+def test_typhon_checkpoint_tuple(device):
+    """Test Typhon checkpoint save/load with tuple inputs."""
+    torch.manual_seed(42)
+
+    functional_dims = (32, 48)
+    out_dims = (4, 6)
+
+    model_1 = Typhon(
+        functional_dim=functional_dims,
+        out_dim=out_dims,
+        geometry_dim=3,
+        global_dim=16,
+        n_layers=2,
+        n_hidden=64,
+        dropout=0.0,
+        n_head=4,
+        act="gelu",
+        mlp_ratio=2,
+        slice_num=8,
+        use_te=False,
+        time_input=False,
+        plus=False,
+        include_local_features=False,
+    ).to(device)
+
+    model_2 = Typhon(
+        functional_dim=functional_dims,
+        out_dim=out_dims,
+        geometry_dim=3,
+        global_dim=16,
+        n_layers=2,
+        n_hidden=64,
+        dropout=0.0,
+        n_head=4,
+        act="gelu",
+        mlp_ratio=2,
+        slice_num=8,
+        use_te=False,
+        time_input=False,
+        plus=False,
+        include_local_features=False,
+    ).to(device)
+
+    batch_size = random.randint(1, 2)
+    n_tokens_1 = 100
+    n_tokens_2 = 150
+    n_global = 5
+
+    local_emb_1 = torch.randn(batch_size, n_tokens_1, functional_dims[0]).to(device)
+    local_emb_2 = torch.randn(batch_size, n_tokens_2, functional_dims[1]).to(device)
+    geometry = torch.randn(batch_size, n_tokens_1, 3).to(device)
+    global_emb = torch.randn(batch_size, n_global, 16).to(device)
+
+    assert validate_checkpoint(
+        model_1,
+        model_2,
+        ((local_emb_1, local_emb_2), global_emb, geometry),
+    )
+
+
+# =============================================================================
+# Error Handling Tests
+# =============================================================================
+
+
+def test_typhon_invalid_hidden_head_dims():
+    """Test that Typhon raises error for incompatible hidden/head dimensions."""
+    with pytest.raises(ValueError, match="n_hidden % n_head == 0"):
+        Typhon(
+            functional_dim=32,
+            out_dim=4,
+            n_hidden=65,  # Not divisible by n_head=4
+            n_head=4,
+            use_te=False,
+        )
+
+
+def test_typhon_mismatched_functional_out_dims():
+    """Test that Typhon raises error for mismatched functional/out dim lengths."""
+    with pytest.raises(
+        ValueError, match="functional_dim and out_dim must be the same length"
+    ):
+        Typhon(
+            functional_dim=(32, 48),
+            out_dim=(4,),  # Length mismatch
+            use_te=False,
+        )
+
+
+# =============================================================================
+# Activation Function Tests
+# =============================================================================
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+@pytest.mark.parametrize("activation", ["gelu", "relu", "tanh", "silu"])
+def test_typhon_activations(device, activation):
+    """Test Typhon with different activation functions."""
+    torch.manual_seed(42)
+
+    model = Typhon(
+        functional_dim=32,
+        out_dim=4,
+        geometry_dim=3,
+        global_dim=16,
+        n_layers=2,
+        n_hidden=64,
+        dropout=0.0,
+        n_head=4,
+        act=activation,
+        mlp_ratio=2,
+        slice_num=8,
+        use_te=False,
+        time_input=False,
+        plus=False,
+        include_local_features=False,
+    ).to(device)
+
+    batch_size = 2
+    n_tokens = 100
+    n_global = 5
+    n_geom = 235
+
+    local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
+    geometry = torch.randn(batch_size, n_geom, 3).to(device)
+    global_emb = torch.randn(batch_size, n_global, 16).to(device)
+
+    outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
+
+    assert isinstance(outputs, torch.Tensor)
+    assert outputs.shape == (batch_size, n_tokens, 4)
+    assert not torch.isnan(outputs).any()
+
+
+# =============================================================================
+# Gradient Flow Tests
+# =============================================================================
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+def test_typhon_gradient_flow(device):
+    """Test that gradients flow properly through Typhon model."""
+    torch.manual_seed(42)
+
+    model = Typhon(
+        functional_dim=32,
+        out_dim=4,
+        geometry_dim=3,
+        global_dim=16,
+        n_layers=2,
+        n_hidden=64,
+        dropout=0.0,
+        n_head=4,
+        act="gelu",
+        mlp_ratio=2,
+        slice_num=8,
+        use_te=False,
+        time_input=False,
+        plus=False,
+        include_local_features=False,
+    ).to(device)
+
+    batch_size = 2
+    n_tokens = 100
+    n_global = 5
+
+    local_emb = torch.randn(batch_size, n_tokens, 32, requires_grad=True).to(device)
+    geometry = torch.randn(batch_size, n_tokens, 3, requires_grad=True).to(device)
+    global_emb = torch.randn(batch_size, n_global, 16, requires_grad=True).to(device)
+
+    outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
+
+    # Compute loss and backpropagate
+    loss = outputs[0].sum()
+    loss.backward()
+
+    # Check gradients exist
+    assert local_emb.grad is not None
+    assert geometry.grad is not None
+    assert global_emb.grad is not None
+
+    # Check gradients are not all zeros
+    assert not torch.all(local_emb.grad == 0)
+    assert not torch.all(geometry.grad == 0)
+    assert not torch.all(global_emb.grad == 0)
+
+    # Check model parameters have gradients
+    for name, param in model.named_parameters():
+        if param.requires_grad:
+            assert param.grad is not None, f"No gradient for parameter: {name}"
+
+
+# =============================================================================
+# Shape and Configuration Tests
+# =============================================================================
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+@pytest.mark.parametrize("n_layers", [1, 2, 4])
+def test_typhon_different_depths(device, n_layers):
+    """Test Typhon with different numbers of layers."""
+    torch.manual_seed(42)
+
+    model = Typhon(
+        functional_dim=32,
+        out_dim=4,
+        geometry_dim=3,
+        global_dim=16,
+        n_layers=n_layers,
+        n_hidden=64,
+        dropout=0.0,
+        n_head=4,
+        act="gelu",
+        mlp_ratio=2,
+        slice_num=8,
+        use_te=False,
+        time_input=False,
+        plus=False,
+        include_local_features=False,
+    ).to(device)
+
+    batch_size = 2
+    n_tokens = 100
+    n_global = 5
+
+    local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
+    geometry = torch.randn(batch_size, n_tokens, 3).to(device)
+    global_emb = torch.randn(batch_size, n_global, 16).to(device)
+
+    outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
+
+    assert len(outputs) == 1
+    assert outputs[0].shape == (batch_size, n_tokens, 4)
+    assert not torch.isnan(outputs[0]).any()
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+@pytest.mark.parametrize("slice_num", [4, 16, 32])
+def test_typhon_different_slice_nums(device, slice_num):
+    """Test Typhon with different numbers of physical state slices."""
+    torch.manual_seed(42)
+
+    model = Typhon(
+        functional_dim=32,
+        out_dim=4,
+        geometry_dim=3,
+        global_dim=16,
+        n_layers=2,
+        n_hidden=64,
+        dropout=0.0,
+        n_head=4,
+        act="gelu",
+        mlp_ratio=2,
+        slice_num=slice_num,
+        use_te=False,
+        time_input=False,
+        plus=False,
+        include_local_features=False,
+    ).to(device)
+
+    batch_size = 2
+    n_tokens = 100
+    n_global = 5
+
+    local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
+    geometry = torch.randn(batch_size, n_tokens, 3).to(device)
+    global_emb = torch.randn(batch_size, n_global, 16).to(device)
+
+    outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
+
+    assert len(outputs) == 1
+    assert outputs[0].shape == (batch_size, n_tokens, 4)
+    assert not torch.isnan(outputs[0]).any()
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+@pytest.mark.parametrize("n_hidden,n_head", [(64, 4), (128, 8), (256, 8)])
+def test_typhon_different_hidden_sizes(device, n_hidden, n_head):
+    """Test Typhon with different hidden dimensions and head counts."""
+    torch.manual_seed(42)
+
+    model = Typhon(
+        functional_dim=32,
+        out_dim=4,
+        geometry_dim=3,
+        global_dim=16,
+        n_layers=2,
+        n_hidden=n_hidden,
+        dropout=0.0,
+        n_head=n_head,
+        act="gelu",
+        mlp_ratio=2,
+        slice_num=8,
+        use_te=False,
+        time_input=False,
+        plus=False,
+        include_local_features=False,
+    ).to(device)
+
+    batch_size = 2
+    n_tokens = 100
+    n_global = 5
+
+    local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
+    geometry = torch.randn(batch_size, n_tokens, 3).to(device)
+    global_emb = torch.randn(batch_size, n_global, 16).to(device)
+
+    outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
+
+    assert len(outputs) == 1
+    assert outputs[0].shape == (batch_size, n_tokens, 4)
+    assert not torch.isnan(outputs[0]).any()
+
+
+# =============================================================================
+# Model Metadata Tests
+# =============================================================================
+
+
+def test_typhon_metadata():
+    """Test Typhon model metadata."""
+    model = Typhon(
+        functional_dim=32,
+        out_dim=4,
+        use_te=False,
+    )
+
+    assert model.meta.name == "Typhon"
+    assert model.meta.amp is True
+    assert model.__name__ == "Typhon"

From 5035fdb40f959abbee135cbd5af9c09abf568a97 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 8 Dec 2025 10:44:49 -0800
Subject: [PATCH 13/32] Add tests for typhon model

---
 .../experimental/models/typhon/typhon.py      |   8 +-
 test/models/data/typhon_basic_output.pth      | Bin 0 -> 4861 bytes
 test/models/data/typhon_te_output.pth         | Bin 0 -> 4840 bytes
 test/models/data/typhon_tuple_output.pth      | Bin 0 -> 12293 bytes
 test/models/typhon/test_typhon.py             | 101 ++++--------------
 5 files changed, 30 insertions(+), 79 deletions(-)
 create mode 100644 test/models/data/typhon_basic_output.pth
 create mode 100644 test/models/data/typhon_te_output.pth
 create mode 100644 test/models/data/typhon_tuple_output.pth

diff --git a/physicsnemo/experimental/models/typhon/typhon.py b/physicsnemo/experimental/models/typhon/typhon.py
index 851fc42b8f..a094f817f1 100644
--- a/physicsnemo/experimental/models/typhon/typhon.py
+++ b/physicsnemo/experimental/models/typhon/typhon.py
@@ -35,7 +35,7 @@
 from physicsnemo.models.module import Module
 
 # Check optional dependency availability
-TE_AVAILABLE = check_min_version("transformer-engine", "0.1.0", hard_fail=False)
+TE_AVAILABLE = check_min_version("transformer_engine", "0.1.0", hard_fail=False)
 if TE_AVAILABLE:
     import transformer_engine.pytorch as te
 
@@ -143,7 +143,13 @@ def compute_slice_attention_cross(
 
         # Compute the attention:
         if self.use_te:
+            q = rearrange(q, "b h s d -> b s h d")
+            k = rearrange(k, "b h s d -> b s h d")
+            v = rearrange(v, "b h s d -> b s h d")
             cross_attention = self.attn_fn(q, k, v)
+            cross_attention = rearrange(
+                cross_attention, "b s (h d) -> b h s d", h=self.heads, d=self.dim_head
+            )
         else:
             cross_attention = torch.nn.functional.scaled_dot_product_attention(
                 q, k, v, is_causal=False
diff --git a/test/models/data/typhon_basic_output.pth b/test/models/data/typhon_basic_output.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4c616db8a6c60300ae8171a9ff54af498dcf456c
GIT binary patch
literal 4861
zcmb7|3sg<p{>OI@Nu|2!C3zJgda6|R`u#{Y*`=iD6ndA~>~?$SDDf{ui6kV1LQ!5x
zoTN3UM=BwNlv9Mfl1|9uc;xQQxo6<!{_mY*{Pr4itvNrx?_B#g<{IDW!B<e^aFmrf
z|JDpSN*qZ{<XquG|DZr|M6kawS`ry8u?Pv21e!<A7x0|qIljK}@=HY=J||crj0&D>
z?jJ3Q5QtU$qe6qCBLpG-lF)@>VU+(OYZZ~)@Zml^rAfrnk%8f%BKfaVRCoempk(^j
zn<^p&A35n5OOc|Fg7g|387=DJHb_AtQkur+@a6d-e8ttGo^EM!d_}(8YLRksaIjDy
z2o07*2p5V~+@nH5qe4TRBZ4I=qFy6BMJf`Js;5XzZ0^DDsT{3SsGKK-%EIHPt2XBJ
z>UP!UVZx{dffD~kp;2OKGM-Zp>Efl$N{0UJkH!@;-Lm*O+JrN*d-Pwg#6M6F5x#Ii
z=t2q4Y1mi#mlXf;$IeD30Dl!dm6s-$#m@!w=Zx-FKv0Y%R499QJSU6)Ht)Yg2_hsC
zL8weaBJ-fHk~F_8ey+iQW8JL=SyfqB@|?{7M*(4GbJ$CLFOagqPZ_1KcO+M97>IWy
zQ?*H*gc~Qs?4}j4d!+(mTolCdLupmAI&2z!lu36qV+=#oA>_gYYQAbO$+;aswQ^Qq
zMfP|M7?X%6lS{CqXFb!KV+Z>^Z?aEtD5iBHx)_;|aYGI9SY-_8_PJ4(&ND-U_vvKI
zMGh@jSEq6&+h9nUG0oi(M(g60a8dX$x>(B`OAP8sZQTa2_&tpdGC2y0BX%(srnY3i
zDGS+c6%Z2efVs1>9=td4$;+kss8e(k+tj>BJQ`EGdmACjQ;}Nd>d^sbQs8WWIUc^+
z4gt9h?AnV>#9;AgTGo0QZ1#1L^fGPwdPzK#S8KEH<&1Ia{Y*0B@NDXEzy>dEIm>j&
zpWvE12GS%&7v`|qM|iX{mXbD8{1}@6U04baor{U%WXk@Zt^pPdIRfRz?pS4I#x+&W
zhNHF|?5qkTCH;S)mLV(Pt=s}oP7%`P4kz|JHyq^3jB$coIJE3pj$<_U;NG*FnbUcC
z^myJ{EJ_SOp8hS?uQivJ&R4*yKb=r3?>OvyeU~iDw}PD<E7Wv*OKc{qki@uy<Z+5W
zs&4m&&+AV>j=l}yWkz7tsY=$x`4EZJzR#@gQ;0n;%mAJaVU&u7Qf2Q4Fw-Q84LYe#
z?Ou9e!)kTdojQcp%+DmgodVM2lSaHgJ}36h+lX4lFkIF<fgPS|i(0$@ny0=6<PC1a
z7WZIObQI9t;;BrX)n%yoM;5efRECV)In+AEl7#_!sAO$M*9YAtlYi~OI-DJWZzM60
zc5oS;ts6(8QjO8T`vu(cTTU15e-7epPSDrQ4%&-WkefUm3^3V9R4Rh#X*WH{4ndeR
zb|4$P><ws4e@zT$sguJt0u=Bj(w+M|VSw*F=vv8w;9UsT)oRib33A`N9wj|)B-6Wn
z*WhAv12mtMNVI3N&@nv*^*_CYkk`+dOGaGU{3qb1j%^^;bEVq)OWCToc654^7@I%d
zgkFbkF{6Td(u3!R(!)1+SbWh1bT3Y##YG9w`-&fXqWK&N+g`$CnkJGPw+1i|KWxX2
zIEGHymxmD<KnJ;t@wCMb_ED?`%3V;Ub^H_T!K8FDuD+4H&pA(8UMn#5&92;aI%a4(
zH67O&8qxth4#6_v6X;((2?C4(7d>>PXSOM$oiK@1wr(a$`6igVARTk$^`MX^q?{-7
z$>mKwv31}&()=iHtne2_;@OXb>C;|AlA%2H+jkw5(1)FpR}Orl0dnJ#V83o1J3}W4
zYZy%^uUWu!HXUL7!bYNzUkAJD*C$YXDidBd2P1c0IMwl83njzU={0#R$S4XU1j?D=
zpHjhAumKAPJJayNvmsh%Hom=L1O;m)U|Aw2ZC$%aNc1WUpSuLM-Pph`IKL4M#Yq6^
z{xs3Pidc5F6154^y~ceM-b`sBSxp<5pzsRdj-Nz5o{3<c<^<d^RtMv|lBj#70;)w#
zC8g1uaHY3D^Qv+eRc%V3vAMO3$>WT&!cs>%^hi8L)m?;X2|LMRdpCS@CmyD)TM7k{
zzp;{Ewg9}agbSsEL44^P<Ybo8RqJwz&(JGydMl4s-unzbjn5%1E%DTCjcTGl=4{!!
zXPuyE(n!+tI>?X)4>r~`0|j~~$(s5BWL~K=W+>bwzQ!6@`t~5RX{IN6;TjH0=Bywg
z{iZ`Iw-4PI$w&7-0Wj#fh-wCp!s=a>Y%-39@nOkiPeB93MpS}(wm#Uu<FN(FF0`eU
zgJH$3Y;{aLJ(rt`gT|<!YxPFrTyTWdE{^AJIy9V^CLJJLTP~edlS&%e6i~%NgRzdh
z3mL&?RM}_*cARQw1O>a814A20$f9Lrq^kp3A8ls$EomjK;cGzesVa`zyMd}yCxO?h
z{^Z^VAJ8fffmQ0S*ima9!;ZXUGUCZzvb@=nI&%hL<Es`pVtkC&<)6h!(FIU<+6PX-
z6WQ@?s-$t^esHVag7QJGtmU}5^rLnZNhs9C6B7mGfiMZ`OFy%BT+YG!P(5afZ48tD
zsenD-ehBl66)-^4nD%&Ogc~b%qM=+L=BiC9xqQKlTy6dXrYM|-TG8)>ryNJFZn*`8
z4_v8v7>AA@(F5uR32@!?59HucEmD`i3geTfI0~;YaKtSYD~Fh2`LU@O(5i*bf<Ck@
z_BMR{?GY#z8{y#qYs|83fVs>^IMHVk+8%g7Y}U%-`!UYMG(!$M`!_Q#3iAk=c8KYp
zKAz-s#lmCXI992C23vm&Xs;*N!P(0gW*u1zi$5r{mPuw%DA`Wl2qzP#-S*^B%mR#h
ze}_G&n**Lx&2X8HJhmTggrwOi5cY(D#S_iTJnv_as{5az*5Eh<tQyEIpPdgeadY8f
zZ2;uVZX#_#4)k=#9XKs$VV+Ju19@@U_~uzQWQwnlv`{amXqP|JuJ?h#aWBC7BbUy(
zXN2~<_}mOtTeKd0l8#?$gMRl^u$0u0QNMSxnuZBPoerT&HOGngL_V?QO`yo@i*vI^
z;_2oiFtyTv=K0-%BCY`(ihRagy(mG4Vt-V4+DYDbrEniCHOGLJL3pt&2P7LWf%)q#
z7=Q2c*o<D5bfMS?8O;dLzuuR-UUeVoN~j=VNki~qaiVm;n1}(pYfn9t=Bc`s1(bgr
z_D8RW8oA||M3<7RPgAL)*I_nmOdj;O<$!%tx6osouE4wc&rFw52@`F5jd+E`!qqpM
z!K!~CB${tV{jFQsPfbqX+^YtZKQ90)-6B%<IFS^(r@`Y9)v&mCHhuJfgQ4OH5Tx-d
zF>Fg<S3QZsAwD13MZLyj>p!&7@uexQI$6#1I$RHxnm0lJWdY1)!^!O<$n3o@W~=7k
zXWkkWv1%i&K@_P<>K|G|T+CK>Z-zD1wKKyiUm@e1`A1o7<89V!&u(}zZyK2xHBY){
zm~vARR+FCFM&h*3erR)I4eWk}bn1F-vdQ%(R(vwV9X3zNjLQr09k6Wi*2Q$Cs1Gyt
zq5$%52E%Y(KdP-d9P9R1k@(VWP%`c$iQS`yEzj?BRh#6n{F)Aa)T|*X1@Y4Q=^`D$
z<KWseLR}iN*q6``8>4`h&*u}1B^JbYEeCJdOraT@d62ba2<x`n6qc92Wp#H*NJPCc
z$o;Z{oOOK24tnRu^0P$PqxTF_)%Omuzjq%Z*7U-NWxdhpfH{rL^d-d=i7;4uGMYXW
z;E?ca;`pf$M$T3N==7&EJ9WsF`G$B=V={HSp2^NyG@nl198NrycQUEc??MBwjdZ#+
zz?Ik;=u|2uVeCfOH&6+SDjzY~kM!x5lxy&k(-+%KN73Yz=cMNHIp||G7O#`uG;>fs
z*YuP&**i}OZ6|ochgs*yJclzxS!E!OOL#&&oXp`<@*^^p8_&KNKOOI`J4m@@`^ac-
zbJ{WB47ik}fwQXyO=&R0oGwjjY`C2`OwNGzxzEX@!|q_e{SLQkfL56jvmb6J8Nw({
z70jqq2Vadobbcro^()q3Q&T0e3Dl=1)84@DTjSBfIuCXgrsMg^%2=XoNPJeGW~yht
zgZ8_&3@kejP6@8`u-7fnxN8sg4cCaF{Bb7Wm?4QjU+g&l^AqAZ!5gQ%%E1_=Fd|uo
z)OBJaxx3VgT0EQ#JmDl7-cm=pB)ysU+jGI)<xe<RlL29VgV8ANDJ)%nipjH9z+oQV
z;E|aKU~7S8A`ZLrMkaRXwnLqdh=zqo_r9u3((s!ju*E{q8=XQjFBEd~y&dsg(h?Y?
zw4QdZ--iX7dm%M`7}+suFH!8t!&<i+z>%COd#g4Zp5;tpo$Ag&M%6GntA0~iR^<WG
zb*u`iPOXHNmW5PVITIuYrX%Ov1e#Y{0cClV>y-T*tlKz{5#GvbKFv7gWh$m5Z6s5u
zJ%zj;eg_-fD!^moHD<1G8vT847H;E@0GadAd*D(Tug2lX;^)q%A7@Or&gXAOIr@LJ
zK3^=@Xcd`)Ydgce|9}5y$}#Cyf%zBnB@T^>kUG%VK$)5Lj|ky88T}W6tZc_v+gmv}
z*jicH+uGRKjJCA4vm9+@Z7=Ov+1fkU*x6ZIjj^|vsr<Fw(|@?s9m>LkFDDcH_sPSz
z?%YuJ;Jfyp&LMszeQU;zzoWd7_PQhe;O&0|erv8xz5`B^8vbs9|K{j_WPWRw`+vu*
z{R#7Lll~*>+v+p;4mCsihP#vZ)ocIA`L>4oeaG?8=$`Xe<^Blz_HQ=*4pjRSQ1=Ds
z!B<im_1F9X3TA(kWT!~7_OGk)mqOxj7HfXJFP-=0(RQt`{jaAoJ!Iz+(m6djvhiQX
F{u96yeAxg1

literal 0
HcmV?d00001

diff --git a/test/models/data/typhon_te_output.pth b/test/models/data/typhon_te_output.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6b85b2324d1a895710d8b0635b033a41e69f2f86
GIT binary patch
literal 4840
zcmb7|3tUWF`^Tr!Z4g}?%B3Qu)YL?Y_IeOH9pjRF3PnvdN!N*T=pVYEL@ra3q)3EF
zc-3A{AxT9^CtZ$&<Z^NgM>@$n!+CvN|L=R&=kx6SS$nVbUB9*Vv-iwzISLeda5yR|
zoNtOYN0lQE3-I>$a}^6+{X@h7A!2inrQ)T$faSjSc8VNlS&5Ke8^{rG+{OMu?%q7t
z5V6QNnCluOToxko^^kV-3-%9kU1`Y;lp8U^Nm)8bu(ah;FJYkk$1YrZU;m}zIUg@_
z0~MU)q-`t$dpIdb&+Y*sfr``h6vTl_vjiN0yud@yBPLLJ`ZhU14}n}vpvoq9cYj}B
zp}Sb*?-$IS8RQ`h5_(J)xr@1hs>ZVex#GZ{vjckt^Be`rDk1$&sANi^vT*$2to1m(
zzMVDC(?4j%QnBkwVNkGiBzrrRkCT_qpD#n7efIkXnQmG97(I-``wsf=^>AJ4EAsMN
zA@md5+nIc#e^}!G_h3C*CSd%kz(?}Z!DaDd0XiJ(Zv`w16AS%i3ukX<`47hZk0@V}
zSnMm5X^0~Cw3Vde%i_lx`g5$l)ga3%a|?Sr{y!JcJ}ZiZ?N-CuL`~wN%m6z?4U4ub
zKv4;oHum<Tyy$H>?vVgnR}Lj3=0w9W{vI@F=nZqY7O3-+73|V6BR2{(U}DT=k`+Fm
zCgB|D)7ehgD>>9|Mmj=5EJ;>xBYN)>XiNKcd@@>yZcX9r68qy&(=h-#I|6V^&LpzA
z?N7$fDHoh>)Pd6Ar!=+56+PUzg{}cBz;OL~rgwo5odcAh|Cv~rad|0u=r)Xo_*Gz&
z%VLsO=SFL;DvkfY&@S@uR2L~JQ=l`sjijEQp%-V|CV89EiRzPXI_|`B49^yWPQy+-
z(sv*j8tTw1+BR@AQy<r@^Cv?qhv5bH9V8__kL>?z6y8@iqaATC7`=lm``FMIN~0c7
zxtBs5^EMY!Z2n}N9FTcM3NiWQ1t!Ttp%9x`NZ;rPZf8VH{k6X1Tg#VV+|zCnlxYnU
zZcc-Ari^{m#GtwHJ`7rO9N&gL14%s(yl)uL8Z{AWJ-x)}{=Ad4_{}VsZ#)J3+AHX?
z={M*|-$|rqvoW+&E*h#|B&NBym_+X<G_y+{*v34%z5gzlT_+Fg-=<TSs2AkniiOZi
zXb5Zl-cZAtsWe0<koZif#?Y6ZbX&zBcJ<ykI`6a#bpFE9tn{~xQLQzmIi{gOeg*k;
zgapsDc+q}tE#zeXbztIUPmD&&BQMg5RF&E=6LmxwekBK@HFh)e2dTrVsmZ9NIvES6
zWRm@yD_~Jt%^Fy5fYw*#WNbrUR4vKERjTH|t6C3NwwOWe%B^5JXc04aS1zQ!uE(H?
zI&>_~hCY5AxIrW^H(p4)&lk}ug9zHaz>3C&%Z=}K^aN{=dx<<f&ckzR@o*vA6Gy(L
zBzn~;YL>JbQVReb_<gavd@8oL7r?8MR2-+g8&_^#k4F^K(eKx-(7D|dB<a>fQg{}o
z9h6}B?iVC(y(78=X~P6JGZvm4r|%{z(1UZM(Aq~4&KM=&1cyZE*RmfiQuYBGyaP1#
zhr*CMMOfa{30W(F?OE#xJS9V>-??&HlQ9Y==z0=&M`QS%&%^Nnhha*PA=KVDMlbTx
zQ0Y}NZjo<-MXHn7X`NxT<5CE1(CiJz9!!C~J)V-ImF=`&&sTI3_Z_SJ(=sUO)1&bH
z+<4&WDgtzQ;M>~KFhlhLopB+KeNPg}n{stLm#z&R-Ls)Mw8?gc*D2W1TMeBA#^~o=
z366(sn7Koi(ep3okO+%t98x<S2kS}o)DNO2xp!IlLN0FB35M50MVRmJ3mG%3Xr|^m
zaF{<3%(QFil4f_(yT2~h<=LY+G80B!nShq72EfyM7s>V)GbsDZdTh3FCvE2#La#ew
zVx2A?mP`hdqiaA;=OtZM!O{y6y2S5DJyAJa$pn<Wq|JB2ZJBTds?>^9CEXd-whLLm
zXL8t;V@``oI|_K7+3;4Yj#RCUWJ!S!CfP@V$JC1;-oj^#mS1PA^nb<xJ0Dxy!AhvO
zw<jKKYb7Uw9M}i)V<7wipSt<@GC9AQ(1W|`N#sOlP<gf)$FgS7;-XLDYg1|5F(2$P
zw~3wB5doQ_&cbh%lOQW`0dqjGfzDR+A=XBhX!Yu5Dt;11o}PV7^ON>5L-H12q97Yh
zJI)Yo@l5c%egLaZB(ZO&#N&}=J>lKm;m|8NhOO1q!aPrZ9Q1PoY}~=2&1Jw=8f->q
z%@w$0swYwJFdg43A&<0Q*+;vpN0Ro6i==R!^t)(KRFD=K&bk%(QkCLF+G-w0O*ZX=
z{DsQ2eWwQco|;I4bhu2$?B%#<!8km+q?jsB>p{{kihw8dBFS=(sb9}Yupy%evXc8S
z6PEA5{*}ofj9h}XssXTfml8hmFeRlg4Jao?eQei$MSN5-h@|@rAR6z|2v1c^+(zy$
z5Iq?W>8Z;o^I|@v2n)!@($&BfDPnY_3g|1^V`<4~=oRdb4R%vuK(Zs~dJ$;px0LFn
z$)kOD4A~ifnO%6~346a&3pCD#qNU&kHC(x$gvY#KQ8R}oJqyJ{`xk%*TS2=b73hO8
zYjB$&kv-V5mD~t8Lr>J-0?`X~mYrq}&$q^tKlqlY!av4LxV#Aj_w=!>;tb|@@)<XH
z!&a?~ChfCgp-JmJbsTt{*?z?oz-cv`V^mGD=_z>Dm_qW*2<+Vui`;}%Dt}8A%!;n!
zlJ|=t;k9~URpJA3IyH*cuZ;uA`<wJt-e0W3>}J-8Q$*HFEMVk(F{aHmU<S@p#^N}I
z!a9!%_{+-zkEF)JeJ5oS-PZ_5yQ<Me;reXGD_uOD5{E7mSbCWG1tR~lAVu+>sMe*A
z6_2mdX@4e>haFRKVPsD@a4dyb?!E^d8k5LbcTb48tcAijYkV`X1k)z_U@x!ppyr%{
z=XUX-@2+(P9PXj~vi3XFExkH_GdmdX=UTyUF-EvNyDx09$-uns^-wb|0DYE~(b%vf
zFsP3NdL+mrh6~A{pr`bCu`Z0sv4P<NE3{Cr&Y#oZKvfwHNLF;fDQ%Us?b=D&H~l(B
zRaB6Q=;KsH)K2%-wbPCP{jsX;7M^PifW_T_3gP--pHK-;)6As5HJRk{{?$<97KV-*
zsjQx63)#FTjoSLYrk(GG;IE83mY$-v3i=w-ceDjWGbY2l>wD4DRfR-PSxYwEjDQ-h
zF{)t{8B(?ayIFUlYv2kA4u4?TO9@SFi3j<y9Vn=EhYGn8qP6b@)+Kb>cGYNM7XJa9
zu-HTs^dHlmZL`rLe-f!oiw1AIbc|Kfp<TYeQHhTm_|Msbt+&si@4{&8$heJz^Dl#u
z_&#0G5(Ayv6iAlVPmJ3;GsaG0L?);2q&CMkV%3Ao*mCzE$$Z{O59OUlt&DJDHEb&J
zQQuCVIYq*?y_Luu+DhJ>3ns72keOxsh<Sd%hsjUPLX)(2#QUfb?tZulPJ}ejXrm2S
zyw(Aa>SY7}YB@H9^1-!cKUH?T2y*^2;niCS)3eMMCBNUM!v^kxOP&I%VP8T{D{rJp
zb*D(J@^;oTqn_<|ErraedqB=kEyZs4$>2m{NO9o`Y%|>f!(U3jyNeZ&y*wT?!^Qz;
zUpPB!ObHElnSu_H3$bEI9I^3JfzX=c=wLDmILFX-L~Q{XYZ!@Fj|_$bk3(>?vN6=Q
z&%=nJu~^Nd;-kDc*nIRfE!C_7>)Qg5&vB-;V+KR~h6sFUGnZO=8Gwq36FS{=g3t&Z
zbV*TxPL<{OuevE95$033b$n_zuboC}=8@<_&QJub?c=y3WWf6Ym@{QQcqLcU)=SUm
zT+RkEXmM{QGbfUaDBcR8XBT3dcM9Wj*$Bl!&e&}0gCm@GvDb=yp|y1#G4;QV$4|D>
zRfUt`kGeZxWx5J37;h~&``!nwbffUv*ng4a1{>NaIu9?FY{6-^<xKSGH>A9FJuF|a
zkm#JKr_CMCOjgo9a2#$4F4HB{E!YW1H2+mlQ8I_Bp(RFzKc|Nk&ydC8miRE|9@F5x
z5?d1D$^A>GsnFdA+sv9sTW1_h)RV(d^BQ&<kHu5L1Uwdqknz`s^7m<^F(wHXhh@?8
zR<G$KIXhCUn?RlR=fdu5n%JT~l9uo{;`5A!6zB3_jfNXYvP!5&R1TS$TSQH~l)ynh
znsmwOkv%n6+122J4f~?#vQaPbhz1YFac!Bq%e7%pTQf5>CYE(f8cUm9-_z&yCG>8;
zeza=TA|h#zBUODmnDS${saEbYX!YMpLSh^+;?`-#GNYL^`pCocr|X!bR@%(TOA<^o
zjJ0)JScKUVEofrBI@~vw{zuw*9?kZcgZF`2V&`*<)TiITZF_R*oxl@xbrN^HU8yA&
z#hQcpJw<HkyO&6AW<xcM!J9K08Iq}oJ3G9wv_~nej{k!k4BG*o`L=M&U>C6f5zLBK
zz@cxVsk4$k6{}rBi>+CV%=zr8u$>*zi^Gw{kDbo|&bV)#&nHJ|{m-q>2MaceD^qYi
z)ob2=_kTtl<8Kx4KA5jyVUS4bKvyr7IcZmshrQkK|4k6zYK)}~f9zN*KHtV_wDssw
z7M9i)qxhCK(wgO1ORLc~Ru(oEmR2&A*3P*bMo8VEEF1-LGQppA9zJ*H+Oiv8lppRx
ze2w|siR*oV_)}W@2J@A%|C;u>tseFTZI;yPe@pwu%YP00+#~CJ0j&NW@RK+Hn)7+a
z_5Xr1U%IN_2>a--zlMCCGy}eXIQIDt<nL7dn)3NWHTr^5{XNR})6G$!q-6Sc=OGGa
vpD?lwBU$;+S?xp4a5$lTKVFyi`*15-<755f28^O?3qsmQnImievF*PBnZC4r

literal 0
HcmV?d00001

diff --git a/test/models/data/typhon_tuple_output.pth b/test/models/data/typhon_tuple_output.pth
new file mode 100644
index 0000000000000000000000000000000000000000..63329a08eb1b228a4989ecd9a63bbb847cbde558
GIT binary patch
literal 12293
zcmb7q2{@Kp_xEGW6hf0kC}hYy<+<0Egrbr(X;6l!lpz|N2Bgq*kR(ac9EsAzbFWpB
zREAPXk`zhmNSfzQ?|I+%I_Lb)`G4QHpKD+Hx}IzA-}*i4UeCSv+Uw`(E+Hup$jAu(
zN7E2U3nEvC1&1#4jf@Hl3G@w(iVTa2G!0k~xnNY-k`Nasae<FdjQHAcfq}cgKQc7J
zKX{aHROF(N<#N6efqqeoLIQjv1D7oijqqJzAr~%YU@)^!ua)J!GZzE}hKv8cM9w87
zbV20Q-@lX#mzXKmJI5?sa;8M@=pPmpE;VtOL}a-16nBBUxO;%RWI}kKiOFK_lI~&&
z;WA17{-Gfuf&P(;LYFO<n;a1k7!erYyvRRNE?jn`SGZhcxV%@mg1cC7uk~Q@;NQ{B
z!}|tHO_b>q8Ls#{YP!30Lb%ezcnNpupQy@7!9N%Giwsxs3RjI9<>}r>MyhAZ*0x?o
zKfC8Y`b<|K|F=FHwJ<bd>4He#6@d}Udzrd8_33rpznb|^@N=)4HvIhAKl?v}>j{kh
z9{k@P^<5CMC}`Qzz-5sxPWu1B|F@Zc9Biz95@0LCpNsbz|JnbUz)*qZ-xBa!9T^z<
z)AcS+rvI;Y|3fHbQDkID;7<-4xz@qbz4m|hf99Yru=raJKcmvr%*AQc|0jX6vnNn-
zQxn-QG$J=A8$N93eQ>brLr!NVK#R9zY4b@19If;QHx5@Ip^htRq+URuJ;O*a*ivZx
zpbZ16cyzWofZzFei2HIA+6GTztF4Q$GUYgFyftTCoz)PV--V9`%F^{NT@>7Acs)3h
z7RwS?ju6nwvct5=DjJepkasa`;mzBV;qCdEByM^Za$3qoTg!F0>}%0rk>1M3jZ<M;
ze-OUS5TnGDHdGvP3kJXK!a0kDv~t5DDA`g8`s$|ST*ZsD)=gv0i>uId??lQoxQ%s7
z6VXrQF$TB}VK4WM!(m~es1~!6g)Ubj8OMQ~r(OnJ8h9Bj>QAsy@t*u|QHAt=kdUn!
zT};jzR><CNVc$%R*sJoZe6+?{Zdsx=3QYR3dU;vwo;?tXcAL_?&@7y&UX3yLsC1D2
zC}@${A)4D2PBA^&_^!qZ)n2)AN!D{|Q}|UbH(3YD`$$u*?@l~C@f4+MK1RXH8(<%r
zhx2XgxZKA>s5#f0t$dS%+i!7{KO}%|O&`J9uA6W+7CRs%=PI`#NWezl?)AyIau7wn
z6_t&(XTq$3c-TXP$20pd{dO6;9`Xws*nL51fihgb`U7>7W3bxk5dUP!VD@%%EDcKw
z5E%@}7X_Vq#=p1Sk6V#Mr?NKUiC?Fo^Ug>7052szP5S}-_GAh*KMKSz9bIU$*ozbc
zlu71h3tqpnoK~5h;}U2c8Xmg?`w}!Ty*>)PD%#+j^8s+)<x2f;F2wS?^*GE`B)aJS
zQq&M5N2AV_V&Q-`te;^7#no%5(tIp^Qp@A6ja!K?SNW6P$9Pz8Sr1zU9;8{Yjnc2?
zqTdkWlb_~rel7NF@V;d3rAG{V`|t(VQaO%JsQb~^`=gkSmp|3sm7%bl3FtC%G}S#n
z%dcqY!xH0tX#Rw5_<G|Cey%ekrTVo@MtDP%pF|irdN<}e<wK5PD>lXKgU?6LbG6q(
zaig;e^+?E*+v}^86QB%%7sM&|s5rId@5hak7lK*GK$2N-8|-EsraEs)w0QRf^`kYJ
zlbV2xgSXMZFfT~8dxehzv{_?U5kH|+0)wwC7isJe=h|oQ#>oddpfJ`6I;I41&LLVj
z^tCjh>n*HLc*9wK_opw93`or5G%u{O#*B*+m?NvgR=tT~dj=iE?;!^$UONl2E)3>R
zI$F~7Q%j&xJ_c4itK<(=1>yQhQY3IPWHbEZDSD7N8-45v8cpVy{P4*ba#fbHlB8Mr
z)pk6;Y74&ATuMv4RY^rvnaNe}K$j>UE5$cZM$A`Hd2$)Xd`}lH9JdFyIzPhsSM8Z!
zA2q7=Uq(hYz{;;{<GDRcNm`lX??@={%Xag;!V>{}4K2nhrM<$e_;k8fk%Y_Z8o2I3
zV=$m#J085CN$1}cpoRQw?n<3A&2xPM{hc+KnZFt}%zVTB_z}gAQ?aM$-M2t;Rys}{
z!;#_gV;Erd47*efaLolvx-iR)*y2#u*q4#vwej?3usjY@Y3B6mt4T7`2P6fzLG4Tt
zm;B*+Y3#mDxDgjq^80&`>#s_k8ofQ%jH295f~6iXL6aB^dz*;TztrNns8%#C5YpzU
zn_<|8RF;_N$~g|M5ta8jkNVQ#EdJy&>K>p<UfI)G(*9~rqp@5R|LFx6BqyNlGKDB{
zmGJz!xp4XAP*TgyMXBbE6!yi9j)zymyD`J*>2OzE7v6_OD-9NHbNmcOvd6$HV=gY`
zR?*m&S~y>DADwcyaKhCSsVny@6gSPqF1aBvHg_=fya;2mIGnzVwSsty8A@*m!|wRG
zbk^3NjOrIL%MUi>k!!`&Tc2W4|0y(V^e8s(jw!F^e-<69q^L4121A@S;+(WnOmi7W
zq33f^BG3dI?%v>|woHIwIo=M&H+J$Vae44*vo__pX*2!vXJ|*BAvHa3#w?{8NaebC
z^{FXjn6Uy6-55^yRf|A=bQ2zsu%irh59l=1qJ{I*Dc@iczRmOG&BC4tRg^zq-KMW#
zSkU{9yJ3$R*{9L%&^EYiIEjR?4b9J*Q=#ZRW+shcau1)Oid71AsBa_@8FA0u6{$ha
z2~B?VW1pH1fYW|C_`c{Ao|rffE5^U$EWSm<jy+3pQrHQ&pcclCZqB5hG3i|7I}v1?
zEWjbb8{yoVQnWldopN@{GMm}&@!>&NHd0|YFSwoHP~+qQ7M8zK&;uo^j6W{&t{F{l
zwjo*#&j3I9xo9Cbm1ds2De}Ck&(`f6M~P+c_~O^5BnryH&i5@4ICUD+Sz`!^gX~dY
z^;R@?_zT>cRfOrE=FqhM8f<P>9{u(>9DNQAVuPmsz?Mx9MU!pE(?;_M8udvZPwp;-
zAJex}hwDme(Vv2`o{W_p(!k)C#ngGpnn|s_$u~yK;kn}hRE(2gRq_qgI_pHu_Br6_
zd#5zd$AMaI4<_C4d7!M_Pqe}zl@#Y*g#PjS`I|9)*~^6k@yME8SYgwTo@dQsOH%95
z(NIgMSrf%Hn>qSXbsi6PE`?Q%3^%UTV__lbc*$c0MMvk+@X}&_%7{FOlTD>lImhsL
z+jAK7tQt#a>XP$~G0ZqPnNFQ!<fPJ%WF9=gkxn|ydzmzSKe>^T8_t8?+}rrb$AY?x
zBx#|tD@>lN#0JeRLt~KvG_CAL%|RdVzyn>TcvFnFtQIGqbqP!+O$qLQ&W5k~MpzZJ
zj~V4VQ)pv8uX$Di&c?VhL2C_!c(#Mo>j1iUOoIBfXt5)*BWaqsE_!Z1&L8p$q`pq^
zRI@x8e#?kMtBc}%+KVM5e3&VGa{LjdE|kZPrWmquUrsCQMzNWxrCgSV5*gOT!;roD
zl;NYmVtic4-e(Dw-yFi#TnVDAoWa=aUyEWB#j$+-Xs8KDAuY|mDDg^}qP``A<bHiz
zbN(r|sM-T3(T@hlediCq>JRsIYvAhacnGYJCf)oXIJVi6hJDLMyV3-#zA_pe^TjA`
zm>077CD^95ni3Yhfrbg|m|l%1DAWw5(PEWYqWY3gyJ-d`Mni=xJ&WrIH;1v@E7WUe
z6tJO`Tl~EWgYD0VJ||oiagRsS)fMslN7D@Yet3tdq+>fj;lTlJkai|T7H^{Dkq3C^
z$#U$Rgf~re@&&s|UcIr9AT*BaLuDo<xHYLA9`$(A>)yR1*dT?UX)4PKUYKI#I9trO
z8Ydhc^%}LdJwW68vNXEk7#tK^!hIAsq_Kzm`A+e}T;*CBx}RlAv2F$U^-uv^zw3x|
z_Qd0o)iLC{#E$iZ?V%QNTXI_+iVgD%!84(V7k|7HWeOHxpOP^A_>N<ry|%(ZZyB-}
z8A=y+2Y}8SHP$TI1(&z=CC`>UEbo9ne+oBH&xR}LUHuy$b7=xow)o6<?jJ^uNy_{Y
zwGy6-lwmHJn}sshSMy!wdaPY)3=B7k$7LE~Ea1l=*8K1mepkMXw(T`wwW$-dGB4w<
z+TDD5-4x0+7yxpHtx(d|j@FklxVINFK=W`c-1HWsQNynAkAucjbNNwFD2d}^ylou{
zg>AUCTa~_sj)#z^-ng!R1!t9ifwO(x#6|YGjhiy>fM>8XPTw`3yrmeQShEODSO`!Q
zafCuL_p#6Vufh383@15pAoZOQ#7-`eLu=E0e2G&wx9WQXem%DW_1`#AamoUr)qP`V
zJ0V4fW^kmIUWjMawP>SG1z5{TGV{CMH1fxECN;|x<L+_jcDn-Ctcr#6>lrnqM!=#x
zCwk=T%7)h-;zn!9(ha{%KJsk?*Ry&VX-ICukL~xM>V^e-`^1=>Yeu1=;2!KNEJN={
zdT?}nt5EaY0Z6#98hnGyNMfoNSxr(Gy^0x5ulJ21!>dK8Jhc*)13rS|w3%S!X-l`X
z6G#;Q416+2u<dQ*$gb%&&X+M}?b7csM#7C33KdY(e+2uP`9uxBJuOB-Ao$t;x6H@x
zpEDnG7bmO#&&=od4D8d`g@}`GbF%9cQMp!u#^znZ#8*ZvW9c)f&|XVZJv~@JbtdOC
zSOZ>iwh(ZuMl|A!31(eWWn+BgnM>d$Ui|A~{!~#Y6cq%Mq4hB8s+~(Gb5c;4;=)ua
zPQlt(0UR_KO1ITkQ>Jt(xa5!ITPloc{O52m*|HJ7M-N3&Pa>~3O9>RM=cASPF0^=~
z48~Kc5vOd%rZvO4nq5A0G2Pr@;)b>OQB{fQIa^VO)@JOHpNC;S=P}z@l``i$(|WP5
znAzpPuDvv2mKw68pDW8O65Qx%$}~)JQDv(v8o6?5ckV;$edsz6j}HSIFloCuOYRb2
zq4Yj3+Vv((c`*y5V<j={kR=^48iRUIiF=%N1KNL7g54Wkicj?5#2@d6=AkNVpwkKH
z8D9@-PW9Z653+1sVJ+U7-pZ>Vl_sb9O?Yy=1WoKO#iVs#L#^Tzc0vC#O3r=*>EhO~
z%93DV{X^Vcslg&f=0LEt3642q#gro^;00}YwtM6hmgJlOXr{qD({1V1okl+QOAOAa
zoJME!Q~CYzQ}N3k8JgMG3wk`<F+ch<+_cU^dzCpXioM2)(aoT6qa1Z5Oqi{G6b(w4
zk9F}+QG0DAcxPL4k-DczFlP>TyG@V!?0JqF)=R+1?;$F0^A(n16YM?{O3D#(Ok@2H
zm|oJx8yDMQOxSg9zWgyfRUQpz)%P>w#XmTK_AtB?^$><nm#5cRX7tc_KRi<wXNmpq
zpwG}y5ZSlj5Tj(c`d*sl9+zVC>$^F1-*Rv<Nyg3<9gtqTfN7`orIyt(rOS`a<vbtT
zvBGC>dA+fpp-jvRIs+B3Y1j|kGi?HjA9APd7qppo`bhlf@CI0d6_xyY7cA|q$Y_%|
zh3AE0uu&xM+VT_vU<N1o-HqEkWjN#2viY)09bh#ojazfBH>asKLzhq9yt4C=(qFZu
zN!ia6S9J;bM)l+T^*KeXO}-N$auq7QzK%iV$-VCfcQRYl3buQIcFs{{jk`CnhSeI(
z@9lj~OELq@1N5kbX4ChX6Pe2UXjT`$U$m<IG1i-%Mx93`@TI7g_h=4fzSmA5X^4T?
zti3RgtHG{&TKuoN16iJM5W~{nxbwTTL60utC%JAII(-_%uF=H4=CW+m03jap9*GC+
zF2gC0gS@ikL_)J7Uh(ipXt^|vR_tlu<QwdniD?F|a-Am1pD~dxomIrPm5~_I(~3Wg
z9O(4<`D~6#JT0EE6!*{agO)5^N{L%YsdxQA<N9QNO$85Kb7GkBhBBO@`;=dBhoEqk
zDm7WWMWNAFt}b&d79YvM>8TQIdAK5Ve7lSVt_UU1ccEs<3$FUw6Ko1_<nEg}vc`-S
zp1)<w4n?h@o`wEwQiwQRZWzM~Zw`R`E<?JU^Aa5!C42W=X-d1S%uhMgjup?V`1uj?
zz0Z0NvlBIN&$cciSD^!p-c4Au`WLuVHJw$S?uPB*y*bEjaW>YbFKs{H4psL6uXW2H
z+@6d#3{>dVSR4A%x(k()mvf_|)hITn4=wre8P&&+VNVloVTNK5Os)5&0Z<307gRx!
z?<}^WaSW7IxuWCVX;4}*o=QWCvCx9U%rOb*z+VH6W#<v=F2U4kvNXoSgC+U|@u^pQ
zS=$Xmlr(yP6R(?K+bu<$z=qP|j(xpn*oAm+W*A0J`-Klyyvh0WapRPYkAhN=64@0w
zu&(JYAbGJGFDg3;E%P^n;fMaTXqh~=X!obB(+${GTQhEZ_-3~5#04(PD4sqhK7c`w
zO!yVLR@CqSSbfts+#<APt<u+E&N?g3v3roi{2?zv=jKsNpDT%jFYF>2H7&+x9tMSN
zH@IoT58&yWKlrn;6|nh^9!r}o4abA**xXqyI3*~YlFCddO;3x4h8%#8#~oN3jO9<R
z9Z%D0j3BjvL*);#a8$IFdyXry$?Z8`dFeg3>6r@q8g9;KSj~f%9S2}lXs~E`ksWzP
zgz{DeH~INZjx3{I8*+0KV9fkXSnoQ91xAPR0WL4Ge6I`Jbt;S{RBXVhD;DE0=@D?T
zvlXlsIOCKDfmAV7g+9mJMkmuMzDhKR=?%^2dP4p1@s&humKsJy%ay6z*^6n!N3+_3
zx!BN0pKh-BA#yx#%HFFr!0jK&)D+dkXFn-}hTKwgIuJ+u+@|1ze1DSlJ4Hrbov>_f
z9z1zpg(F8yVTbx>!49Q$;4z^P1}Ukq_Rdpq(Cq|2`)xd$rwQ>%umlad>WyJR=WzBa
zd)9PSo{Qg60cQ70xyt8ng=d>mMboQnm`ufE92Pu<%*LNbL1G#-yToz9YBQjR3xcx6
z%5>q;b+-LV2c{ROut}yuW}n-MBfct7@SHrpsd5$js3t?2@*&J|lMVjbZ!b0|e1Mfq
znN4_J1dX&2x9c3{gttziwas%-8L3JZWuM?_rWx%m9l;Ee%sGC}ei*QYz*Y7NKddCQ
z)Jh7N<}C?!@|giMTmCEPkS%}Oz6m=P?8V<26fq<dXqkgH(`#Q3&BxbKXWnyuc>iBv
zet`mwiI8R`R&@~LG7giHW$>31GHig33Y$@`2-0u#VX90vzTcqChMrtQqiPO<$JL$i
z_=hQPt~QvpoxRCvel?=WZtw8bK_t&(iJ}ExukZ^!6FKuEeQEOK4cM+{O}n>0;FWL3
z;K5>RG;Hd<8<B}6@w~%msGv#K52mpFeMeD|xB*FakD-m5=YvzN61jgl2l?yAlFr$s
z=qdG%KNWQu?6RY1`bAxKFy)NsjgJ@%_mbw0ymn-Zv}53*|4nEbm@ZP9wT1GB9^+80
z2i}`)!*g~A@urys^FDHnW!I;HtJ4C|3Ox)pO9#-f7oB{Sp&~_e8L@`1(iEoD#m9ZS
z4>J3=3zx-cQP3|AtZdUWzGUTGz`2^(Io%f8Px<14u8FwGLY=kuKg7Lv^#qUakz8?L
zB6!S*W1g0^;J2*@;?=t$W?~69Q?nV(j0}Vd@@-hPZ#Qk1PGFe_1XSC5HuSsXLX!?j
zF}Ks|{AS@!-t>MhD(|}|%IYJ_zi<2uceE>c#ab0+Ty4Xir467O9b1YU<xA(Do1)aD
zD^RgllD5~b;v3Gy2+wT_0JBUBzTA8k45-<OF$FW}#O@2|zet1D#ht)4M&2y>sR<et
zM^O244b~VShNBdmF#72XI2&3+ACKIC55AFXj?ZPGs@+KP+j5w*$@_}tL!wyxDMie(
zxPb|?WAX4WMO<O=H|UZ|f>m?n$WYdT3m%??*W2EK-;pHd9Qzz9UJGbh-Y&Q)Y=*QE
zPGGS_4z7Nf2sJJq^ucX1Oqg!~-|Lz{c|uxgVqqFcrk~)NM_5CL1P{(Ttii*6Bo0`b
z())a0a^H{n(cF#^%xT#Puvr<$*M+E%!%=H)=Ddf!cZ$KnobvrprBZ@+R|nF8?HAz6
zQcu|2&2vh>WkbwKYqmRWB?~TOP~K?8QWEm9ak&b!bQyx0SGGb{vmSero`&D%kEeHy
zgQ5Lk8XO(@67PL^#zjxwNQNF;>3c>Q_`OMni|)-(IieL@g4;n&;;ASmi-(y<?D_m*
zx3Jr)Le&0bB~#ET71k;>VzG`Zb6k?<P^M_gjK*!k!f(NJ#rZaLKU#)Ue)VC`4j8lS
zrfOU#W63ThedIS81fqnx86;>=!~Ms$l7*Wlw)G5P6$ZPx^|IU8#iK{j-ztUAU)|03
z8&L$Iw+2(bMhT9LuHzORT7)?hJ=uWy>nYvZ0l(J<V$p^EtRu(?=5HBK_ft-TbUDwh
zuydo@Q?Bg$d?&d7_AXW@s)Dds114{<hurB0;O4l2Xj2l;=d3>J5Ha~JckpZ(9v7~F
z1s|u9va%JH6ox>_)?&>4`W|ZRgZRaB(xLXHC#^dFlzTKf4(60>5S?-xMxGOfI;>Ke
z#1DQN2X6Y#tVbz;%nnF!fu&RE@OzGUi*anL_5;+;{=&z#O0#)fGT4};;1*j+X0~7>
z>lW)vX)C3$yk7(Qd1~+)eS+D%o!9w`4)xr#vD$#IoUz-y0mcq?VD+_2m}2Qx-tb5t
zw(LkIO3rWQ+dD79;%kyj^eqRw$2*9c^g8hDXKVKS!8vr?w@<j*REehkS_M|^(X{`*
zCq|FC2parvyth^;+D?!p3;AInts>7i4iT|SQ=QnxIDPoMhGB+sH2+34fPzFzSaUyR
z>VI67JwB1ZEnZ*By&gN1^}VhGd9yXCXP6k%+nvhJ-+Ih%o4cI74=~}ByE|au&7thb
zp>-JFtiW}se-=JdxWJ1G^{GC#FU@=F2xB`ZvV{s8xX2%BO#aj=>fU<{%}Ulm-JmP*
z-l#w1<qf9$%E!QL;$ALm$rZlRzXO7w&1Q}5U(tA{D++ekqW-`XoG3mGB7OY$gQKtZ
z{*SJ53jO_P#r{Qbb#WXmJoFsWK1ws44R%;!bPv7mI&oHGtD&Lq1MYT%B*Gmdy1D%;
zZk`d#jFgPob=4hQXTQ&2XmXTKRFdVM-YekO-Mx=l^N!JWk8Cc-w_j=BB3-nrn&;4c
zph&o2vIcW|{sa;|7nAVUFMP|EOOSfhlU;st8#LC(p}SHX>%N{->TPPp=B$+<;nhT{
z^0#J!2`4UW@_LK1H|^PeH6_;g^ezAVZUpVpP-o4lKDd3-DBAjME}f1$hoYeA7%*}$
zyC1rV#4>vCDI8w%Bg(XCuN^}5grn#gl|X@GRhXmkKnKka_u%vuF|e{o6%AF>q%iX&
zY}nS0bVZvyDz>p;xftB^#DsL0$3pfzV4Hq@g3Y^6@n_dLgGKit9N41;xwkVR#=8*h
z-tA;F#I0DozBKo!asX2)TuaMekA$3{BKG*pUS9CLm=eB;AnIr#-Pd=cJ^lB>=ljP{
zA!i7F5;KLZChAb@?29hbh7t}vg?{#RsBvizNZm7J-w$fw3ITFHO-j`GTL@iydKCIz
z7*7-D>+vC$!`ZxkKX4RZggVx}_hmC~;#H|@IQ-Ri_WsUBy#8w|KDqgdFQ2j-r)rI3
zDSNg++^iJL{&1d`I*?Cp#bx-|{}_ryTbPXgPP~>I2IoJhG8#S@!aRIf<;p}9Y_mo2
zuxN^yY{O;L_a}L?-hDwKjd{OwhT}HdIV}@6meqTfjP_lI^D7E**QX<>B5O~=Hh&Ud
zFN8KbE6RP%FmuN^Tsim&-&x!Ua%OG#azYMt-0#3M%^RScl5lUcA~zwr_fGeK9I5TK
zV3i5=qKLW_91@<67kl^T2}v_JJ@^douWEy?ry5B+m%_!n`|y~g5>vA=V8$)maArw6
zZa5#u&5Sw&GsA1~wBkVKhzmth$_1!A&Jl`x1fuFoBk318O(v-xg%6f3LQc5>`=1<0
zsf`t&Sndj8Nm?v8Q&KdhDgiC7`tW(<JFva`1Ur%3mnHAugeJFTnafj4O1ZIuC4MTv
zriCN8f>)2RwZ9l1?Udo9K#f<}G=RUfS_Fnl7lE^~gT}+&Ov^0*4RYlmLVh)y)8C2|
z9*xH}g<e#>Bos#u8qbCrDB$4+W47h75Bn(0=ZlY23w5U1U}WkzRHN;X9WsI2TCYya
z<CMwnjZifE$pAL*hyzYKx0c!0M8S1QeSF(CgLRLn7I8ZhN&T^a)kKEy2OXb)O6gv1
zbN@0faN0U5b#Q^9`^CU?vL9Yt;L5(aEhh2Bd6**iD{X%wO<tQ!u-Rr9t6UO8#ZR`v
z`x&p$PrQsvOw-0S-s`xn<%-l<@*K1#YcZkOK@5K>&J<2av0}Fp_^sFmt=11>Mw!EC
zLYg1OqAI(tSpv=qd#I*iE*|>0m8=6EW97+tTx5J-`uIs+^wiv(bxu{Kg4c!I#n*+f
zu1W~+tfT3}nme4Usv#RX(tr-;UxDKjUc(QKsTdgOg~nP|RF`s<tE>v-`aIYV0mIeV
zhz5BUa%*;P{`9!irP!8!SdL<n$%9bsOC82NTSSW&m&3{N$Dlbb9rHzE$myA}@)<vb
z#X~>A-8)&3?LH6;p#Yo1zTo%{IZA)0KpB^IFz4mtX|%>b<~mxJB2VtX)#bUcpj5zg
zYL>vfJv`Q2w`BX;9hi=;3%B8sEsNPW7l-#h4BO3$S*qPGO#OZsc9l)R0Xt>+rXx4-
z^1<;a`NI#d@1KOClkY%W)E}l-Y=mG@n{bZ)be5hRN8OiuN~_j<;DYAKG5^^i=%Lp~
z=ona5k|S_u0j<WIDCHYY<IU;R5d#WKuf&RkAZC|452aJ1=$=y==0ASjt4WCBnAR_B
zo?0e*utbU)&adS15BH_YPG#20orQp<K5%8W7A2oa;yXS2a5|e*=#6|dbIw*EL2Pr0
z^NO9E^{T<NN7{iMeHYC27(N86?@#%$seM55N(+cDNE69CQerj^n>p3=Zc1M3&P8qu
zq^|sY2z#ajA9v4Vx2qy>)U@6m)ZLv5t_-G(X~htHw^yrRHG;&;pX2jEF?4i|2WwE@
zf-+G{DeHk3u8^{&&sImVTk|zMXqbt*0$GrfmO_3?B0QS44mDgh(RjHRXtzTQDh9|=
z-}*;ffteWPpMNHNwCphG>y2U>dMV7eX9hJ13eb3olc>2c1k(Zz^RrU3@Xp0PoG_yX
zR1V2f8m6-NyC$$Vq7<{1#=)V~R$k|XKX|`;Da_EZV7j4hbZAe9=#6MJ#BA6?AM8g#
z{+>YAad@?eTYnL{&YRG>9zV)GrU75heMaY(D$GhSmPK}NhsCcr#;qKJr!Vh=f;r8&
z>PQgYAC$zcJFP`t+UdNsa3RDD=vVqiekVKDZO;zS6?mG*F~h7!+%Sh|KJ-WeJNIP<
zgdGgPdZx>c4v``APE9uB!5vP<W;Y&`&p`2#H@M}%C_0*APse&|FkKIy;O6YLWY|{C
zmHl#w?TpZ8IcGb$%J<cxI{W1`|GpeAGx#mnre6y>hqJNXp%JbmO{VS*JJ_TnIs9g(
zO|5Y&z-3Y;tnwMc-bVb!&zqV^PX?!QRVJ;Vs29uHsw~*rOf7oxn<Ks-UH}CpaqN|f
zF8d``hb<hbO_MTAS?;Y@xH%;g%Mu@A!$_fU$F_3bY~pJuA6Eny*Q?;;y+JTmKaPUe
zHF0U~)1fDFIX?L62*)}V>HLBXp!doSmzmkpsL5x!)CY>};cf-ex%3$hP8|)09#5gK
zvCsL(DZ|<4Q$x5DO|>}m**DDHJcJ7U-Kg=pEsQ-J0OBb_>8Z+0_RjkhjG19ghl<|g
zid);^=51S4yEc@asCth})a)R?w3*vlx<@Eu?1=gjNAbp+M|gjnh_`XN!pmI%CZ0D1
zG9(2glg>azej|2SrlVc<b)5a88Vgo<vFwOhbn8(BO|PGb#Q~@Jv$LylMe<>A8tD$H
zYZQ8OCtu#e&6rA`#!&y(zD#1^AUd2PLpx{o>&*u}(9^Gg3qJM>bgT&m#~Vh%Z;^w!
zHMJYT`gkWg`?d4=^OY&HAOlxkuLq|cyD`b?7H&9a!FuNOV;PCj6qeGD)Xw(S*(*1q
zZ`T#j(A&bL%$!359!AjS++CRBB80o^99be)%#TfBBr2K<Ge*j@aqB1J#pG0$B(8(1
zxSY9+31{lTlcCUeHyV0ruw&yYaYv~I$*&)T>t`y^t)04byUU!#yDwq)(rieOu!*V9
zeuXmMrD503-u-&ZUeG_Ym~1Afv2&sE<QE%-i?-^cqe~{b?AriCAeMQQoP%#O-ePox
zISbFIMu{RuQq@tk#b-U{-yFrMysm~LM#X&3xO`O5UqFL?cyXz6;#^i<CYQNy4;L}3
zKgEtWXDxlpx!=?Wz(Jj-@SvX<d$4R0JAU+6R`z@-(>kZdy3V+<?hPCGZp|1lIy@7T
zu2^Hvo=1FctQuDBSpoff`Ty_=#7}EBledu*#U1r!C91LD^x_2<`Rivc{l{U_Q|JKI
zZ&F~<U`Xn%@3@l<m*LKCaZY#4UVtn{{<=~iUR#46rh!zaH-wzlUO~ItJ6NYtC~LZV
zlFOSI1AQ-AQ@WxItGk@e-FeoRQgg#_<|1c)hW7*}b$=V4m{$kOmbt*bUpDiuKMDaw
zauCAjin2f564}?}g4>TXSecPZgPhM`><0~UQx#_oF}1?T{C?E)KAv4WGMrh~R)9xN
z1o%#0NDu8T$xdfJ9EnvROPjT9+qP=lxJQC*ekM(Cbr!O(hrZx31q~*2*Z_$EIuxn9
zRn#+8r}u8nn3a5K<;A57p}TA*UuP3Tp%+#V=)A|`uXp*wtKYz*Ep{{}Jch>9jX_6Y
zDsC<7Lv&senZaW6Jfq6#l{VtLi_x&gHUR8OFL5uvo&;xYLn3_z_HnE{Ijxc=>pQ9}
z<ohbfXsyS)sy0}-@*OJ9o`l(6W=x`O4%-%^%9KrG$<b7Aw5Fvb-N_$F`+6?IzNwQb
zdt4`{;Nip1iBrK=Qw6y8ivnxe??{j4@1f}pf&7MmYv^aNgM|eBLd6*oIC;iz(4g6h
zr#H`GHs(brF?%<>Tr+_)7_9=<^-~C=XVbZ7>dgC`2S`7-fT_zwT<)H!RDV1bvvoJJ
z7L|Gw1a^+@J;s9hqrPl_`Xp|s)LA&UZ#Ax&nTjTJr<3Q9BY4Ke9b=9>0E1X5JUFrg
zA1u89-*ZdgY)1w^ctkYwwXeo=XZzDr>Gk-=y^B9r_ZGvh8?Xub6JTp!Z_<A!NiAxL
z(AnaS+x0j`joQ?BA_=lrB3HO(Bul@3Mig78%yxE-WI2*8czVqoR#m(hW#9#>ooR(^
zRZ|v}rvT>-jM&_QZQRorMojF!D)V~r4GvsR!VMEo!up)GR26DUI?Au1qQw?6+}rVl
zUL+-59zYenwG4yV7a>{trEuy>1M<JjgZGjQ*mM68K5=}={j5)>=-u5tOhX{}+5fpd
zIYcn<Z|jr))x4_yU4hb4?x%%Y*+J9(@A^|if!^O*81*ml=kmaaMZF5x=mkHuz`lzD
zT%2_N&ftH9r~e+!tZmHA&Fri#EN#r~ZEVe~E$pq#Z0)VhEzPX#>@BRUt<3Fh?aj@8
zvQ%8YZk9o>I`wDwbQk+c_`k~OKk9>f-Sn6Czv-y|iu6Y@u<oC5ta|mqe~0uJb@5*T
z|0wR&`x9WwKLP#^Me$!T|0vcS`X@}4Ubp-m$^VLk{|fcT>bLrzpwj;V>UXX0Uvd5z
zrGLgrR{DEN|HA1NVE_3^{xKGZ{E3g5@!#Y8JHr18^v7^9{1Z^>KLGuGAbYw?OZP7L
zSJRX*`7hFcH2eP<m;5Ur1p>Q&Pw8FvQ*SRgVEX%#-__+(KlRlDncmOO`M=NoKZQ@H
ABme*a

literal 0
HcmV?d00001

diff --git a/test/models/typhon/test_typhon.py b/test/models/typhon/test_typhon.py
index 39b150f8a6..5b119b2f72 100644
--- a/test/models/typhon/test_typhon.py
+++ b/test/models/typhon/test_typhon.py
@@ -174,7 +174,7 @@ def test_typhon_forward_with_local_features(device, pytestconfig):
         n_hidden_local=32,
     ).to(device)
 
-    batch_size = 2
+    batch_size = 1
     n_tokens = 100
     n_global = 5
     n_geom = 235
@@ -187,7 +187,7 @@ def test_typhon_forward_with_local_features(device, pytestconfig):
     outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
 
     assert isinstance(outputs, torch.Tensor)
-    assert outputs[0].shape == (batch_size, n_tokens, 4)
+    assert outputs.shape == (batch_size, n_tokens, 4)
     assert not torch.isnan(outputs[0]).any()
 
 
@@ -380,18 +380,18 @@ def test_typhon_te_basic(device, pytestconfig):
 
     batch_size = 2
     n_tokens = 100
+    n_geom = 235
     n_global = 5
 
     local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
-    geometry = torch.randn(batch_size, n_tokens, 3).to(device)
+    geometry = torch.randn(batch_size, n_geom, 3).to(device)
     global_emb = torch.randn(batch_size, n_global, 16).to(device)
 
-    assert validate_forward_accuracy(
-        model,
-        (local_emb, global_emb, geometry),
-        file_name="typhon_te_output.pth",
-        atol=1e-3,
-    )
+    outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
+
+    assert isinstance(outputs, torch.Tensor)
+    assert outputs.shape == (batch_size, n_tokens, 4)
+    assert not torch.isnan(outputs[0]).any()
 
 
 # =============================================================================
@@ -590,64 +590,6 @@ def test_typhon_activations(device, activation):
     assert not torch.isnan(outputs).any()
 
 
-# =============================================================================
-# Gradient Flow Tests
-# =============================================================================
-
-
-@pytest.mark.parametrize("device", ["cuda:0"])
-def test_typhon_gradient_flow(device):
-    """Test that gradients flow properly through Typhon model."""
-    torch.manual_seed(42)
-
-    model = Typhon(
-        functional_dim=32,
-        out_dim=4,
-        geometry_dim=3,
-        global_dim=16,
-        n_layers=2,
-        n_hidden=64,
-        dropout=0.0,
-        n_head=4,
-        act="gelu",
-        mlp_ratio=2,
-        slice_num=8,
-        use_te=False,
-        time_input=False,
-        plus=False,
-        include_local_features=False,
-    ).to(device)
-
-    batch_size = 2
-    n_tokens = 100
-    n_global = 5
-
-    local_emb = torch.randn(batch_size, n_tokens, 32, requires_grad=True).to(device)
-    geometry = torch.randn(batch_size, n_tokens, 3, requires_grad=True).to(device)
-    global_emb = torch.randn(batch_size, n_global, 16, requires_grad=True).to(device)
-
-    outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
-
-    # Compute loss and backpropagate
-    loss = outputs[0].sum()
-    loss.backward()
-
-    # Check gradients exist
-    assert local_emb.grad is not None
-    assert geometry.grad is not None
-    assert global_emb.grad is not None
-
-    # Check gradients are not all zeros
-    assert not torch.all(local_emb.grad == 0)
-    assert not torch.all(geometry.grad == 0)
-    assert not torch.all(global_emb.grad == 0)
-
-    # Check model parameters have gradients
-    for name, param in model.named_parameters():
-        if param.requires_grad:
-            assert param.grad is not None, f"No gradient for parameter: {name}"
-
-
 # =============================================================================
 # Shape and Configuration Tests
 # =============================================================================
@@ -679,17 +621,18 @@ def test_typhon_different_depths(device, n_layers):
 
     batch_size = 2
     n_tokens = 100
+    n_geom = 235
     n_global = 5
 
     local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
-    geometry = torch.randn(batch_size, n_tokens, 3).to(device)
+    geometry = torch.randn(batch_size, n_geom, 3).to(device)
     global_emb = torch.randn(batch_size, n_global, 16).to(device)
 
     outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
 
-    assert len(outputs) == 1
-    assert outputs[0].shape == (batch_size, n_tokens, 4)
-    assert not torch.isnan(outputs[0]).any()
+    assert isinstance(outputs, torch.Tensor)
+    assert outputs.shape == (batch_size, n_tokens, 4)
+    assert not torch.isnan(outputs).any()
 
 
 @pytest.mark.parametrize("device", ["cuda:0"])
@@ -718,17 +661,18 @@ def test_typhon_different_slice_nums(device, slice_num):
 
     batch_size = 2
     n_tokens = 100
+    n_geom = 235
     n_global = 5
 
     local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
-    geometry = torch.randn(batch_size, n_tokens, 3).to(device)
+    geometry = torch.randn(batch_size, n_geom, 3).to(device)
     global_emb = torch.randn(batch_size, n_global, 16).to(device)
 
     outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
 
-    assert len(outputs) == 1
-    assert outputs[0].shape == (batch_size, n_tokens, 4)
-    assert not torch.isnan(outputs[0]).any()
+    assert isinstance(outputs, torch.Tensor)
+    assert outputs.shape == (batch_size, n_tokens, 4)
+    assert not torch.isnan(outputs).any()
 
 
 @pytest.mark.parametrize("device", ["cuda:0"])
@@ -757,16 +701,17 @@ def test_typhon_different_hidden_sizes(device, n_hidden, n_head):
 
     batch_size = 2
     n_tokens = 100
+    n_geom = 235
     n_global = 5
 
     local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
-    geometry = torch.randn(batch_size, n_tokens, 3).to(device)
+    geometry = torch.randn(batch_size, n_geom, 3).to(device)
     global_emb = torch.randn(batch_size, n_global, 16).to(device)
 
     outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
 
-    assert len(outputs) == 1
-    assert outputs[0].shape == (batch_size, n_tokens, 4)
+    assert isinstance(outputs, torch.Tensor)
+    assert outputs.shape == (batch_size, n_tokens, 4)
     assert not torch.isnan(outputs[0]).any()
 
 

From 393523420cd0ecc75e5b34b47f5932c71bb51be0 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 8 Dec 2025 10:50:59 -0800
Subject: [PATCH 14/32] More robust attributes

---
 physicsnemo/datapipes/cae/cae_dataset.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/physicsnemo/datapipes/cae/cae_dataset.py b/physicsnemo/datapipes/cae/cae_dataset.py
index 5272beefaf..2d17bfb5bc 100644
--- a/physicsnemo/datapipes/cae/cae_dataset.py
+++ b/physicsnemo/datapipes/cae/cae_dataset.py
@@ -652,19 +652,26 @@ def read_file_attributes(
 
             keys = store.list().result()
 
+            def to_tensor_dict(attributes_dict):
+                attributes = {}
+                for k, v in attributes_dict.items():
+                    try:
+                        attributes[k] = torch.tensor(v)
+                    except (TypeError, ValueError):  # noqa PERF203
+                        pass
+                return attributes
+
             # Zarr 3 check:
             if b"/zarr.json" in keys:
                 zarr_json = store.read(b"/zarr.json").result()
                 # load into json's parser:
                 attributes_dict = json.loads(zarr_json.value)["attributes"]
-                attributes = {k: torch.tensor(v) for k, v in attributes_dict.items()}
-                return attributes
+                return to_tensor_dict(attributes_dict)
             elif b"/.zattrs" in keys:
                 # Zarr 2:
                 zarr_attrs = store.read(b"/.zattrs").result()
                 attributes_dict = json.loads(zarr_attrs.value)
-                attributes = {k: torch.tensor(v) for k, v in attributes_dict.items()}
-                return attributes
+                return to_tensor_dict(attributes_dict)
             else:
                 return {}
 

From 22177f8136ccb8aba44ba48e05c3da05522fd1da Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 8 Dec 2025 10:53:28 -0800
Subject: [PATCH 15/32] Add runtime error passing too

---
 physicsnemo/datapipes/cae/cae_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/physicsnemo/datapipes/cae/cae_dataset.py b/physicsnemo/datapipes/cae/cae_dataset.py
index 2d17bfb5bc..e404c7272a 100644
--- a/physicsnemo/datapipes/cae/cae_dataset.py
+++ b/physicsnemo/datapipes/cae/cae_dataset.py
@@ -657,7 +657,7 @@ def to_tensor_dict(attributes_dict):
                 for k, v in attributes_dict.items():
                     try:
                         attributes[k] = torch.tensor(v)
-                    except (TypeError, ValueError):  # noqa PERF203
+                    except (TypeError, ValueError, RuntimeError):  # noqa PERF203
                         pass
                 return attributes
 

From 1e41e4271ef3f8fa6d8a15d06ceebbf08ad411d8 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 8 Dec 2025 11:35:00 -0800
Subject: [PATCH 16/32] Remove printout

---
 physicsnemo/experimental/models/typhon/typhon.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/physicsnemo/experimental/models/typhon/typhon.py b/physicsnemo/experimental/models/typhon/typhon.py
index a094f817f1..f5641e3d61 100644
--- a/physicsnemo/experimental/models/typhon/typhon.py
+++ b/physicsnemo/experimental/models/typhon/typhon.py
@@ -928,11 +928,7 @@ def forward(
         """
 
         single_input = isinstance(local_embedding, torch.Tensor)
-
-        print(f"type of local_embedding: {type(local_embedding)}")
-        print(f"type of global_embedding: {type(global_embedding)}")
-        print(f"type of geometry: {type(geometry)}")
-
+        
         if time is not None:
             raise NotImplementedError("Time input is not implemented yet."
                                       "Error rather than silently ignoring it.")

From f4202427f82df0d06f19f27a75525186a5b95a49 Mon Sep 17 00:00:00 2001
From: Rishi Ranade <rranade@oci-hsg-cs-001-login-01.cm.cluster>
Date: Wed, 10 Dec 2025 07:48:25 -0800
Subject: [PATCH 17/32] combining typhon/transolver to transformer models and
 cleaning up inference scripts

---
 .../README.md                                 | 159 +---
 .../deprecated/conf/train_surface.yaml        |   0
 .../deprecated/datapipe.py                    |   0
 .../deprecated/inference_on_vtp.py            |   0
 .../requirements.txt                          |   0
 .../src/benchmark_dataloading.py              |   0
 .../src/compute_normalizations.py             |   0
 .../src/conf/datapipe/core.yaml               |   4 +-
 .../src/conf/datapipe/surface.yaml            |   2 -
 .../src/conf/datapipe/volume.yaml             |   2 -
 .../src/conf/model/transolver.yaml            |   0
 .../src/conf/model/typhon.yaml                |   2 +-
 .../src/conf/training/base.yaml               |   0
 .../src/conf/transolver_surface.yaml          |   0
 .../src/conf/transolver_volume.yaml           |   0
 .../src/conf/typhon_surface.yaml              |   4 +-
 .../src/conf/typhon_volume.yaml               |   6 +-
 .../src/inference_on_zarr.py                  | 164 +++-
 .../src/metrics.py                            | 108 +++
 .../src/preprocess.py                         |   0
 .../src/surface_fields_normalization.npz      | Bin
 .../src/train.py                              |   0
 .../src/utils.py                              |   0
 .../src/volume_fields_normalization.npz       | Bin
 .../src/conf/datapipe/combined.yaml           |  35 -
 .../transolver/src/conf/datapipe/core.yaml    |  60 --
 .../transolver/src/conf/datapipe/surface.yaml |  33 -
 .../transolver/src/conf/model/typhon.yaml     |  35 -
 .../transolver/src/inference_on_zarr.py       | 518 ----------
 .../external_aerodynamics/typhon/README.md    | 284 ------
 .../typhon/requirements.txt                   |   9 -
 .../typhon/src/benchmark_dataloading.py       | 166 ----
 .../typhon/src/compute_normalizations.py      | 159 ----
 .../typhon/src/conf/datapipe/combined.yaml    |  35 -
 .../typhon/src/conf/datapipe/volume.yaml      |  31 -
 .../typhon/src/conf/model/transolver.yaml     |  34 -
 .../typhon/src/conf/training/base.yaml        |  32 -
 .../typhon/src/metrics.py                     | 164 ----
 .../typhon/src/preprocess.py                  | 121 ---
 .../src/surface_fields_normalization.npz      | Bin 1040 -> 0 bytes
 .../external_aerodynamics/typhon/src/train.py | 900 ------------------
 .../external_aerodynamics/typhon/src/utils.py | 102 --
 .../src/volume_fields_normalization.npz       | Bin 1056 -> 0 bytes
 .../datapipes/cae/transolver_datapipe.py      |  85 +-
 44 files changed, 328 insertions(+), 2926 deletions(-)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/README.md (66%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/deprecated/conf/train_surface.yaml (100%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/deprecated/datapipe.py (100%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/deprecated/inference_on_vtp.py (100%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/requirements.txt (100%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/src/benchmark_dataloading.py (100%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/src/compute_normalizations.py (100%)
 rename examples/cfd/external_aerodynamics/{typhon => transformer_models}/src/conf/datapipe/core.yaml (96%)
 rename examples/cfd/external_aerodynamics/{typhon => transformer_models}/src/conf/datapipe/surface.yaml (96%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/src/conf/datapipe/volume.yaml (95%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/src/conf/model/transolver.yaml (100%)
 rename examples/cfd/external_aerodynamics/{typhon => transformer_models}/src/conf/model/typhon.yaml (98%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/src/conf/training/base.yaml (100%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/src/conf/transolver_surface.yaml (100%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/src/conf/transolver_volume.yaml (100%)
 rename examples/cfd/external_aerodynamics/{typhon => transformer_models}/src/conf/typhon_surface.yaml (89%)
 rename examples/cfd/external_aerodynamics/{typhon => transformer_models}/src/conf/typhon_volume.yaml (88%)
 rename examples/cfd/external_aerodynamics/{typhon => transformer_models}/src/inference_on_zarr.py (74%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/src/metrics.py (57%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/src/preprocess.py (100%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/src/surface_fields_normalization.npz (100%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/src/train.py (100%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/src/utils.py (100%)
 rename examples/cfd/external_aerodynamics/{transolver => transformer_models}/src/volume_fields_normalization.npz (100%)
 delete mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/combined.yaml
 delete mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/core.yaml
 delete mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/surface.yaml
 delete mode 100644 examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml
 delete mode 100644 examples/cfd/external_aerodynamics/transolver/src/inference_on_zarr.py
 delete mode 100644 examples/cfd/external_aerodynamics/typhon/README.md
 delete mode 100644 examples/cfd/external_aerodynamics/typhon/requirements.txt
 delete mode 100644 examples/cfd/external_aerodynamics/typhon/src/benchmark_dataloading.py
 delete mode 100644 examples/cfd/external_aerodynamics/typhon/src/compute_normalizations.py
 delete mode 100644 examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/combined.yaml
 delete mode 100644 examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/volume.yaml
 delete mode 100644 examples/cfd/external_aerodynamics/typhon/src/conf/model/transolver.yaml
 delete mode 100644 examples/cfd/external_aerodynamics/typhon/src/conf/training/base.yaml
 delete mode 100644 examples/cfd/external_aerodynamics/typhon/src/metrics.py
 delete mode 100644 examples/cfd/external_aerodynamics/typhon/src/preprocess.py
 delete mode 100644 examples/cfd/external_aerodynamics/typhon/src/surface_fields_normalization.npz
 delete mode 100644 examples/cfd/external_aerodynamics/typhon/src/train.py
 delete mode 100644 examples/cfd/external_aerodynamics/typhon/src/utils.py
 delete mode 100644 examples/cfd/external_aerodynamics/typhon/src/volume_fields_normalization.npz

diff --git a/examples/cfd/external_aerodynamics/transolver/README.md b/examples/cfd/external_aerodynamics/transformer_models/README.md
similarity index 66%
rename from examples/cfd/external_aerodynamics/transolver/README.md
rename to examples/cfd/external_aerodynamics/transformer_models/README.md
index be8943b89b..d6750ac47f 100644
--- a/examples/cfd/external_aerodynamics/transolver/README.md
+++ b/examples/cfd/external_aerodynamics/transformer_models/README.md
@@ -1,134 +1,74 @@
 <!-- markdownlint-disable -->
-# `Transolver` for External Aerodynamics on Irregular Meshes
+# Transformer Models for External Aerodynamics on Irregular Meshes
 
-This example is an end to end training recipe for the `Transolver` model, which can
-be run on surface or volume data.
+This directory contains training and inference recipes for transformer-based surrogate models for CFD applications. This is a collection of transformer models including `Transolver` and `Typhon`, both of which can be run on surface or volume data.
 
-`Transolver` is a high-performance surrogate model for CFD solvers. The Transolver model
-adapts the Attention mechanism, encouraging the learning of meaningful representations.
-In each PhysicsAttention layer, input points are projected onto state vectors through
-learnable transformations and weights. These transformations are then used to compute
-self-attention among all state vectors, and the same weights are reused to project
-states back to each input point.
+## Models Overview
 
-## External Aerodynamics CFD Example: Overview
+### Transolver
+
+`Transolver` is a high-performance surrogate model for CFD solvers. The Transolver model adapts the Attention mechanism, encouraging the learning of meaningful representations. In each PhysicsAttention layer, input points are projected onto state vectors through learnable transformations and weights. These transformations are then used to compute self-attention among all state vectors, and the same weights are reused to project states back to each input point.
+
+By stacking multiple PhysicsAttention layers, the `Transolver` model learns to map from the functional input space to the output space with high fidelity. The PhysicsNeMo implementation closely follows the original Transolver architecture ([https://github.com/thuml/Transolver](https://github.com/thuml/Transolver)), but introduces modifications for improved numerical stability and compatibility with NVIDIA TransformerEngine.
 
-This directory contains the essential components for training and evaluating a
-model tailored to external aerodynamics CFD problems built on `Transolver`.
+### Typhon
 
-By stacking multiple PhysicsAttention layers, the `Transolver` model learns to map from
-the functional input space to the output space with high fidelity. The PhysicsNeMo
-implementation closely follows the original Transolver architecture
-([https://github.com/thuml/Transolver](https://github.com/thuml/Transolver)), but
-introduces modifications for improved numerical stability and compatibility with NVIDIA
-TransformerEngine.
+Typhon adapts the Transolver backbone by replacing standard attention with GALE (Geometry-Aware Latent Embeddings) attention, which unifies physics-aware self-attention on learned state slices with cross-attention to geometry and global context embeddings. Inspired by Domino's multi-scale ball query formulations, Typhon learns global geometry encodings and local latent encodings that capture neighborhoods at multiple radii, preserving fine-grained near-boundary behavior and far-field interactions. Crucially, geometry and global features are projected into physical state spaces and injected as context in every transformer block, ensuring persistent conditioning and alignment between evolving latent states and the underlying domain.
+
+GALE directly targets core challenges in AI physics modeling. By structuring self-attention around physics-aware slices, Typhon encourages interactions that reflect operator couplings (e.g., pressure–velocity or field–material). Multi-scale ball queries enforce locality where needed while maintaining access to global signals, balancing efficiency with nonlocal reasoning. Continuous geometry-context projection at depth mitigates representation drift and improves stability, while providing a natural interface for constraint-aware training and regularization. Together, these design choices enhance accuracy, robustness to geometric and regime shifts, and scalability on large, irregular discretizations.
+
+## External Aerodynamics CFD Example: Overview
 
-The training example for Transolver uses the [DrivaerML dataset](https://caemldatasets.org/drivaerml/).
+This directory contains the essential components for training and evaluating models tailored to external aerodynamics CFD problems. The training examples use the [DrivaerML dataset](https://caemldatasets.org/drivaerml/).
 
-As a concrete example, we are training external aerodynamics surrogate models for automobiles.
-`Transolver` takes as input a point cloud on the surface or surrounding the surface,
-iteratively processing it with PhysicsAttention to produce high-fidelity predictions.
+As a concrete example, we are training external aerodynamics surrogate models for automobiles. These models take as input a point cloud on the surface or surrounding the surface, iteratively processing it with transformer-based attention mechanisms to produce high-fidelity predictions.
 
 ## Requirements
 
-Transolver can use TransformerEngine from NVIDIA, as well as tensorstore (for IO),
-zarr, einops and a few other python packages.  Install them with `pip install -r requirements.txt`
-as well as physicsnemo 25.11 or higher.
+These transformer models can use TransformerEngine from NVIDIA, as well as tensorstore (for IO), zarr, einops and a few other python packages. Install them with `pip install -r requirements.txt` as well as physicsnemo 25.11 or higher.
 
-## Using Transolver for External Aerodynamics
+## Using Transformer Models for External Aerodynamics
 
-1. Prepare the Dataset.  Transolver uses the same Zarr outputs as other models with DrivaerML.
-`PhysicsNeMo` has a related project to help with data processing, called [PhysicsNeMo-Curator](https://github.com/NVIDIA/physicsnemo-curator).
-Using `PhysicsNeMo-Curator`, the data needed to train can be setup easily.
-Please refer to [these instructions on getting started](https://github.com/NVIDIA/physicsnemo-curator?tab=readme-ov-file#what-is-physicsnemo-curator)
-with `PhysicsNeMo-Curator`.  For specifics of preparing the dataset for this example,
-see the [download](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/README.md#download-drivaerml-dataset)
-and [preprocessing](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/README.md)
-instructions from `physicsnemo-curator`.  Users should apply the
-preprocessing steps locally to produce `zarr` output files.
+1. Prepare the Dataset. These models use the same Zarr outputs as other models with DrivaerML. `PhysicsNeMo` has a related project to help with data processing, called [PhysicsNeMo-Curator](https://github.com/NVIDIA/physicsnemo-curator). Using `PhysicsNeMo-Curator`, the data needed to train can be setup easily. Please refer to [these instructions on getting started](https://github.com/NVIDIA/physicsnemo-curator?tab=readme-ov-file#what-is-physicsnemo-curator) with `PhysicsNeMo-Curator`. For specifics of preparing the dataset for this example, see the [download](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/README.md#download-drivaerml-dataset) and [preprocessing](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/README.md) instructions from `physicsnemo-curator`. Users should apply the preprocessing steps locally to produce `zarr` output files.
 
-2. Train your model.  The model and training configuration is configured with
-`hydra`, and two configurations are available: `transolver_surface` and `transolver_volume`.
-Find configurations in `src/conf`, where you can control both network properties
-and training properties. See below for an overview and explanation of key
-parameters that may be of special interest.
+2. Train your model. The model and training configuration is configured with `hydra`, and configurations are available for both surface and volume modes (e.g., `transolver_surface`, `transolver_volume`, `typhon_surface`, `typhon_volume`). Find configurations in `src/conf`, where you can control both network properties and training properties. See below for an overview and explanation of key parameters that may be of special interest.
 
-3. Use the trained model to perform inference.  This example contains two
-inference examples: one for inference on the validation set, already in
-Zarr format.  The `.vtp` inference pipeline is being updated to accommodate Transolver.
+3. Use the trained model to perform inference. This example contains inference examples for the validation set, already in Zarr format. The `.vtp` inference pipeline is being updated to accommodate these models.
 
-The following sections contain further details on the training and inference
-recipe.
+The following sections contain further details on the training and inference recipe.
 
 ## Model Training
 
-To train the model, first we compute normalization factors on the dataset to
-make the predictive quantities output in a well-defined range. The included
-script, `compute_normalizations.py`, will compute the normalization
-factors.  Once run, it should save to an output file similar to
-"surface_fields_normalization.npz".  This will get loaded during training.
-The normalization file location can be configured via `data.normalization_dir`
-in the training configuration (defaults to current directory).
-
-> By default, the normalization sets the mean to 0.0 and std to 1.0 of all labels
-> in the dataset, computing the mean across the train dataset.  You could adapt
-> this to a different normalization, however take care to update both the
-> preprocessing as well as inference scripts.  Min/Max is another popular strategy.
-
-To configure your training run, use `hydra`.  The
-config contains sections for the model, data, optimizer, and training settings.
-For details on the model parameters, see the API for `physicsnemo.models.transolver`.
-
-To fit the training into memory, you can apply on-the-fly downsampling to the data
-with `data.resolution=N`, where `N` is how many points per GPU to use.  This dataloader
-will yield the full data examples in shapes of `[1, K, f]` where `K` is the resolution
-of the mesh, and `f` is the feature space (3 for points, normals, etc.  4 for surface
-fields).  Downsampling happens in the preprocessing pipeline.
-
-During training, the configuration uses a flat learning rate that decays every 100
-epochs, and bfloat16 format by default.  The scheduler and learning rate
-may be configured.
-
-The Optimizer for this training is the `Muon` optimizer - available only in
-`pytorch>=2.9.0`.  While not strictly required, we have found the `muon` optimizer
-performs substantially better on these architectures than standard `AdamW` and
-a oneCycle schedule.
+To train the model, first we compute normalization factors on the dataset to make the predictive quantities output in a well-defined range. The included script, `compute_normalizations.py`, will compute the normalization factors. Once run, it should save to an output file similar to "surface_fields_normalization.npz". This will get loaded during training. The normalization file location can be configured via `data.normalization_dir` in the training configuration (defaults to current directory).
+
+> By default, the normalization sets the mean to 0.0 and std to 1.0 of all labels in the dataset, computing the mean across the train dataset. You could adapt this to a different normalization, however take care to update both the preprocessing as well as inference scripts. Min/Max is another popular strategy.
+
+To configure your training run, use `hydra`. The config contains sections for the model, data, optimizer, and training settings. For details on the model parameters, see the API for `physicsnemo.models.transolver` and `physicsnemo.experimental.models.typhon`.
+
+To fit the training into memory, you can apply on-the-fly downsampling to the data with `data.resolution=N`, where `N` is how many points per GPU to use. This dataloader will yield the full data examples in shapes of `[1, K, f]` where `K` is the resolution of the mesh, and `f` is the feature space (3 for points, normals, etc. 4 for surface fields). Downsampling happens in the preprocessing pipeline.
+
+During training, the configuration uses a flat learning rate that decays every 100 epochs, and bfloat16 format by default. The scheduler and learning rate may be configured.
+
+The Optimizer for this training is the `Muon` optimizer - available only in `pytorch>=2.9.0`. While not strictly required, we have found the `muon` optimizer performs substantially better on these architectures than standard `AdamW` and a oneCycle schedule.
 
 ### Training Precision
 
-Transolver, as a transformer-like architecture, has support for NVIDIA's
-[TransformerEngine](https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/index.html)
-built in.  You can enable/disable the transformer engine path in the model with
-`model.use_te=[True | False]`.  Available precisions for training with `transformer_engine`
-are `training.precision=["float32" | "float16" | "bfloat16" | "float8" ]`.  In `float8`
-precision, the TransformerEngine Hybrid recipe is used for casting weights and inputs
-in the forward and backwards passes.  For more details on `float8` precision, see
-the fp8 guide from
-[TransformerEngine](https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/examples/fp8_primer.html).
-When using fp8, the training script will automatically pad and unpad the input and output,
-respectively, to use the fp8 hardware correctly.
-
-> **Float8** precisions are only available on GPUs with fp8 tensorcore support, such
-> as Hopper, Blackwell, Ada Lovelace, and others.
+These transformer architectures have support for NVIDIA's [TransformerEngine](https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/index.html) built in. You can enable/disable the transformer engine path in the model with `model.use_te=[True | False]`. Available precisions for training with `transformer_engine` are `training.precision=["float32" | "float16" | "bfloat16" | "float8" ]`. In `float8` precision, the TransformerEngine Hybrid recipe is used for casting weights and inputs in the forward and backwards passes. For more details on `float8` precision, see the fp8 guide from [TransformerEngine](https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/examples/fp8_primer.html). When using fp8, the training script will automatically pad and unpad the input and output, respectively, to use the fp8 hardware correctly.
+
+> **Float8** precisions are only available on GPUs with fp8 tensorcore support, such as Hopper, Blackwell, Ada Lovelace, and others.
 
 ### Other Configuration Settings
 
 Several other important configuration settings are available:
 
-- `checkpoint_dir` sets the directory for saving model checkpoints (defaults to `output_dir`
-if not specified), allowing separation of checkpoints from other outputs.
-- `compile` will use `torch.compile` for optimized performance.  It is not
-compatible with `transformer_engine` (`model.use_te=True`).  If TransformerEngine is
-not used, and half precision is, `torch.compile` is recommended for improved performance.
+- `checkpoint_dir` sets the directory for saving model checkpoints (defaults to `output_dir` if not specified), allowing separation of checkpoints from other outputs.
+- `compile` will use `torch.compile` for optimized performance. It is not compatible with `transformer_engine` (`model.use_te=True`). If TransformerEngine is not used, and half precision is, `torch.compile` is recommended for improved performance.
 - `training.num_epochs` controls the total number of epochs used during training.
-- `training.save_interval` will dictate how often the model weights and training
-tools are checkpointed.
+- `training.save_interval` will dictate how often the model weights and training tools are checkpointed.
 
-> **Note** Like other parameters of the model, changing the value of `model.use_te`
-> will make checkpoints incompatible.
+> **Note** Like other parameters of the model, changing the value of `model.use_te` will make checkpoints incompatible.
 
-The training script supports data-parallel training via PyTorch DDP.  In a future
-update, we may enable domain parallelism via FSDP and ShardTensor.
+The training script supports data-parallel training via PyTorch DDP. In a future update, we may enable domain parallelism via FSDP and ShardTensor.
 
 The script can be launched on a single GPU with, for example,
 
@@ -185,9 +125,7 @@ Epoch 47 Validation Average Metrics:
 
 ## Dataset Inference
 
-The validation dataset in Zarr format can be loaded, processed, and the L2
-metrics summarized in `inference_on_zarr.py`.  For surface data, this script will also
-compute the drag and lift coefficients and the R^2 correlation of the predictions.
+The validation dataset in Zarr format can be loaded, processed, and the L2 metrics summarized in `inference_on_zarr.py`. For surface data, this script will also compute the drag and lift coefficients and the R^2 correlation of the predictions.
 
 To run inference on surface data, it's necessary to add a line to your launch command:
 
@@ -196,15 +134,10 @@ python src/inference_on_zarr.py --config-name transolver_surface run_id=/path/to
 
 ```
 
-The `data.return_mesh_features` flag can also be set in the config file.  It is
-disabled for training but necessary for inference.  The model path should be
-the folder containing your saved checkpoints.
+The `data.return_mesh_features` flag can also be set in the config file. It is disabled for training but necessary for inference. The model path should be the folder containing your saved checkpoints.
 
 
-To ensure correct calculation of drag and lift, and accurate overall metrics,
-the inference script will chunk a full-resolution training example into batches,
-and stitch the outputs together at the end.  Output will appear as a table
-with all metrics for that mode, for example:
+To ensure correct calculation of drag and lift, and accurate overall metrics, the inference script will chunk a full-resolution training example into batches, and stitch the outputs together at the end. Output will appear as a table with all metrics for that mode, for example:
 
 ```
 |   Batch |   Loss |   L2 Pressure |   L2 Shear X |   L2 Shear Y |   L2 Shear Z |   Predicted Drag Coefficient |   Pred Lift Coefficient |   True Drag Coefficient |   True Lift Coefficient |   Elapsed (s) |
@@ -279,6 +212,4 @@ entire mesh.  The outputs are then saved to .vtp files for downstream analysis.
 
 ## Transolver++
 
-Transolver++ is supported with the `plus` flag to the model.  In
-our experiments, we did not see gains, but you are welcome to try it and share
-your results with us on GitHub!
+Transolver++ is supported with the `plus` flag to the model. In our experiments, we did not see gains, but you are welcome to try it and share your results with us on GitHub!
diff --git a/examples/cfd/external_aerodynamics/transolver/deprecated/conf/train_surface.yaml b/examples/cfd/external_aerodynamics/transformer_models/deprecated/conf/train_surface.yaml
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/deprecated/conf/train_surface.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/deprecated/conf/train_surface.yaml
diff --git a/examples/cfd/external_aerodynamics/transolver/deprecated/datapipe.py b/examples/cfd/external_aerodynamics/transformer_models/deprecated/datapipe.py
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/deprecated/datapipe.py
rename to examples/cfd/external_aerodynamics/transformer_models/deprecated/datapipe.py
diff --git a/examples/cfd/external_aerodynamics/transolver/deprecated/inference_on_vtp.py b/examples/cfd/external_aerodynamics/transformer_models/deprecated/inference_on_vtp.py
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/deprecated/inference_on_vtp.py
rename to examples/cfd/external_aerodynamics/transformer_models/deprecated/inference_on_vtp.py
diff --git a/examples/cfd/external_aerodynamics/transolver/requirements.txt b/examples/cfd/external_aerodynamics/transformer_models/requirements.txt
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/requirements.txt
rename to examples/cfd/external_aerodynamics/transformer_models/requirements.txt
diff --git a/examples/cfd/external_aerodynamics/transolver/src/benchmark_dataloading.py b/examples/cfd/external_aerodynamics/transformer_models/src/benchmark_dataloading.py
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/src/benchmark_dataloading.py
rename to examples/cfd/external_aerodynamics/transformer_models/src/benchmark_dataloading.py
diff --git a/examples/cfd/external_aerodynamics/transolver/src/compute_normalizations.py b/examples/cfd/external_aerodynamics/transformer_models/src/compute_normalizations.py
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/src/compute_normalizations.py
rename to examples/cfd/external_aerodynamics/transformer_models/src/compute_normalizations.py
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/core.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/datapipe/core.yaml
similarity index 96%
rename from examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/core.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/src/conf/datapipe/core.yaml
index 67adcb45f6..8521bce1a5 100644
--- a/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/core.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/datapipe/core.yaml
@@ -16,9 +16,9 @@
 
 # Paths to your data:
 train:
-  data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/train/
+  data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/shift_wing/typhon/train/
 val:
-  data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/val/
+  data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/shift_wing/typhon/val/
 
 # You can set a normalization factor directory:
 normalization_dir: "src/"
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/surface.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/datapipe/surface.yaml
similarity index 96%
rename from examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/surface.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/src/conf/datapipe/surface.yaml
index 8363a7a9dc..0f39ba5b10 100644
--- a/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/surface.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/datapipe/surface.yaml
@@ -26,8 +26,6 @@ data_keys:
   - "surface_mesh_centers"
   - "surface_normals"
   - "surface_areas"
-  - "air_density"
-  - "stream_velocity"
   - "stl_faces"
   - "stl_centers"
   - "stl_coordinates"
\ No newline at end of file
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/volume.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/datapipe/volume.yaml
similarity index 95%
rename from examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/volume.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/src/conf/datapipe/volume.yaml
index b222fda5f9..4b7fb3445d 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/volume.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/datapipe/volume.yaml
@@ -24,8 +24,6 @@ mode: volume
 data_keys:
   - "volume_fields"
   - "volume_mesh_centers"
-  - "air_density"
-  - "stream_velocity"
   - "stl_faces"
   - "stl_centers"
   - "stl_coordinates"
\ No newline at end of file
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/model/transolver.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/model/transolver.yaml
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/src/conf/model/transolver.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/src/conf/model/transolver.yaml
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/model/typhon.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/model/typhon.yaml
similarity index 98%
rename from examples/cfd/external_aerodynamics/typhon/src/conf/model/typhon.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/src/conf/model/typhon.yaml
index 2745a00ab6..6b1e28092c 100644
--- a/examples/cfd/external_aerodynamics/typhon/src/conf/model/typhon.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/model/typhon.yaml
@@ -19,7 +19,7 @@ functional_dim: 6
 global_dim: 2
 geometry_dim: 3
 out_dim: 4
-n_layers: 14
+n_layers: 20
 n_hidden: 256
 dropout: 0.0
 n_head: 8
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/training/base.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/training/base.yaml
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/src/conf/training/base.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/src/conf/training/base.yaml
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/transolver_surface.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/src/conf/transolver_surface.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/transolver_volume.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/src/conf/transolver_volume.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/typhon_surface.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml
similarity index 89%
rename from examples/cfd/external_aerodynamics/typhon/src/conf/typhon_surface.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml
index d76899bd61..123d05f639 100644
--- a/examples/cfd/external_aerodynamics/typhon/src/conf/typhon_surface.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml
@@ -31,8 +31,8 @@ profile: false
 model:
   functional_dim: 6
   include_local_features: true # use local features
-  radii: [0.05, 0.25, 1.0, 2.5] # radius for local features
-  neighbors_in_radius: [16, 64, 128, 256] # neighbors in radius for local features
+  radii: [0.01, 0.05, 0.25, 1.0, 2.5, 5.0] # radius for local features
+  neighbors_in_radius: [4, 8, 16, 64, 128, 256]  # neighbors in radius for local features
   n_hidden_local: 32 # hidden dimension for local features
 
 datapipe:
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/typhon_volume.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml
similarity index 88%
rename from examples/cfd/external_aerodynamics/typhon/src/conf/typhon_volume.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml
index aa1a16f983..1e48580b6f 100644
--- a/examples/cfd/external_aerodynamics/typhon/src/conf/typhon_volume.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml
@@ -36,10 +36,10 @@ datapipe:
 
 model:
   functional_dim: 7
-  out_dim: 5
+  out_dim: 4
   include_local_features: true # use local features
-  radii: [0.05, 0.25, 1.0, 2.5] # radius for local features
-  neighbors_in_radius: [8, 32, 64, 128] # neighbors in radius for local features
+  radii: [0.01, 0.05, 0.25, 1.0, 2.5, 5.0] # radius for local features
+  neighbors_in_radius: [4, 8, 16, 64, 128, 256]  # neighbors in radius for local features
   n_hidden_local: 32 # hidden dimension for local features
 
 # Logging configuration
diff --git a/examples/cfd/external_aerodynamics/typhon/src/inference_on_zarr.py b/examples/cfd/external_aerodynamics/transformer_models/src/inference_on_zarr.py
similarity index 74%
rename from examples/cfd/external_aerodynamics/typhon/src/inference_on_zarr.py
rename to examples/cfd/external_aerodynamics/transformer_models/src/inference_on_zarr.py
index 8b1f1f5e3d..e363c14758 100644
--- a/examples/cfd/external_aerodynamics/typhon/src/inference_on_zarr.py
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/inference_on_zarr.py
@@ -19,18 +19,20 @@
 import numpy as np
 import torch
 import torchinfo
-import typing
+import typing, csv
 import collections
 from typing import Literal
+from datetime import datetime
 
 import hydra
 import omegaconf
 from omegaconf import DictConfig
 from physicsnemo.models.transolver.transolver import Transolver
-from physicsnemo.launch.utils import load_checkpoint
-from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
+from physicsnemo.utils import load_checkpoint
+from physicsnemo.utils.logging import PythonLogger, RankZeroLoggingWrapper
 
 from sklearn.metrics import r2_score
+from metrics_new import metrics_fn_surface, metrics_fn_volume
 
 from physicsnemo.distributed import DistributedManager
 
@@ -199,7 +201,6 @@ def batched_inference_loop(
     metrics = {k: v / global_weight for k, v in metrics.items()}
     loss = loss / global_weight
 
-    # import pdb; pdb.set_trace()
     global_predictions = torch.cat([l[0][0] for l in global_preds_targets], dim=1)
     global_targets = torch.cat([l[1][0] for l in global_preds_targets], dim=1)
 
@@ -322,9 +323,17 @@ def inference(cfg: DictConfig) -> None:
         logger.info(f"Finished batch {batch_idx} in {elapsed:.4f} seconds")
         start = time.time()
 
+        air_density = batch["air_density"] if "air_density" in batch.keys() else None
+        stream_velocity = batch["stream_velocity"] if "stream_velocity" in batch.keys() else None
+
         if cfg.datapipe.mode == "surface":
             coeff = 1.0
 
+            if stream_velocity is not None:
+                global_predictions = global_predictions * stream_velocity**2.0 * air_density
+                global_targets = global_targets * stream_velocity**2.0 * air_density
+
+            metrics = metrics_fn_surface(global_predictions, global_targets, dist_manager)
             # Compute the drag and loss coefficients:
             # (Index on [0] is to remove the 1 batch index)
             pred_pressure, pred_shear = torch.split(
@@ -350,8 +359,7 @@ def inference(cfg: DictConfig) -> None:
                 torch.tensor([[0, 0, 1]], device=dist_manager.device),
             )
 
-            # air_density = batch["air_density"] if "air_density" in batch.keys() else None
-            # stream_velocity = batch["stream_velocity"] if "stream_velocity" in batch.keys() else None
+            
             # true_fields = val_dataset.unscale_model_targets(batch["fields"], air_density=air_density, stream_velocity=stream_velocity)
             true_pressure, true_shear = torch.split(global_targets[0], (1, 3), dim=-1)
 
@@ -383,20 +391,30 @@ def inference(cfg: DictConfig) -> None:
                 if hasattr(metrics["l2_pressure_surf"], "item")
                 else metrics["l2_pressure_surf"]
             )
-            l2_shear_x = (
-                metrics["l2_shear_x"].item()
-                if hasattr(metrics["l2_shear_x"], "item")
-                else metrics["l2_shear_x"]
+            l1_pressure = (
+                metrics["l1_pressure_surf"].item()
+                if hasattr(metrics["l1_pressure_surf"], "item")
+                else metrics["l1_pressure_surf"]
+            )
+            mae_pressure = (
+                metrics["mae_pressure_surf"].item()
+                if hasattr(metrics["mae_pressure_surf"], "item")
+                else metrics["mae_pressure_surf"]
+            )
+            l2_wall_shear_stress = (
+                metrics["l2_wall_shear_stress"].item()
+                if hasattr(metrics["l2_wall_shear_stress"], "item")
+                else metrics["l2_wall_shear_stress"]
             )
-            l2_shear_y = (
-                metrics["l2_shear_y"].item()
-                if hasattr(metrics["l2_shear_y"], "item")
-                else metrics["l2_shear_y"]
+            l1_wall_shear_stress = (
+                metrics["l1_wall_shear_stress"].item()
+                if hasattr(metrics["l1_wall_shear_stress"], "item")
+                else metrics["l1_wall_shear_stress"]
             )
-            l2_shear_z = (
-                metrics["l2_shear_z"].item()
-                if hasattr(metrics["l2_shear_z"], "item")
-                else metrics["l2_shear_z"]
+            mae_wall_shear_stress = (
+                metrics["mae_wall_shear_stress"].item()
+                if hasattr(metrics["mae_wall_shear_stress"], "item")
+                else metrics["mae_wall_shear_stress"]
             )
 
             results.append(
@@ -404,9 +422,11 @@ def inference(cfg: DictConfig) -> None:
                     batch_idx,
                     f"{loss:.4f}",
                     f"{l2_pressure:.4f}",
-                    f"{l2_shear_x:.4f}",
-                    f"{l2_shear_y:.4f}",
-                    f"{l2_shear_z:.4f}",
+                    f"{l1_pressure:.4f}",
+                    f"{mae_pressure:.4f}",
+                    f"{l2_wall_shear_stress:.4f}",
+                    f"{l1_wall_shear_stress:.4f}",
+                    f"{mae_wall_shear_stress:.4f}",
                     f"{pred_drag_coeff:.4f}",
                     f"{pred_lift_coeff:.4f}",
                     f"{true_drag_coeff:.4f}",
@@ -416,51 +436,85 @@ def inference(cfg: DictConfig) -> None:
             )
 
         elif cfg.datapipe.mode == "volume":
+            if stream_velocity is not None:
+                global_predictions[:, :, 3] = global_predictions[:, :, 3] * stream_velocity**2.0 * air_density
+                global_targets[:, :, 3] = global_targets[:, :, 3] * stream_velocity**2.0 * air_density
+                global_predictions[:, :, 0:3] = global_predictions[:, :, 0:3] * stream_velocity
+                global_targets[:, :, 0:3] = global_targets[:, :, 0:3] * stream_velocity
+                global_predictions[:, :, 4] = global_predictions[:, :, 4] * stream_velocity**2.0 * air_density
+                global_targets[:, :, 4] = global_targets[:, :, 4] * stream_velocity**2.0 * air_density
+            
+            metrics = metrics_fn_volume(global_predictions, global_targets, dist_manager)
             # Extract metric values and convert tensors to floats
             l2_pressure = (
                 metrics["l2_pressure_vol"].item()
                 if hasattr(metrics["l2_pressure_vol"], "item")
                 else metrics["l2_pressure_vol"]
             )
-            l2_velocity_x = (
-                metrics["l2_velocity_x"].item()
-                if hasattr(metrics["l2_velocity_x"], "item")
-                else metrics["l2_velocity_x"]
+            l1_pressure = (
+                metrics["l1_pressure_vol"].item()
+                if hasattr(metrics["l1_pressure_vol"], "item")
+                else metrics["l1_pressure_vol"]
             )
-            l2_velocity_y = (
-                metrics["l2_velocity_y"].item()
-                if hasattr(metrics["l2_velocity_y"], "item")
-                else metrics["l2_velocity_y"]
+            mae_pressure = (
+                metrics["mae_pressure_vol"].item()
+                if hasattr(metrics["mae_pressure_vol"], "item")
+                else metrics["mae_pressure_vol"]
             )
-            l2_velocity_z = (
-                metrics["l2_velocity_z"].item()
-                if hasattr(metrics["l2_velocity_z"], "item")
-                else metrics["l2_velocity_z"]
+            l2_velocity = (
+                metrics["l2_velocity"].item()
+                if hasattr(metrics["l2_velocity"], "item")
+                else metrics["l2_velocity"]
             )
+            l1_velocity = (
+                metrics["l1_velocity"].item()
+                if hasattr(metrics["l1_velocity"], "item")
+                else metrics["l1_velocity"]
+            )
+            mae_velocity = (
+                metrics["mae_velocity"].item()
+                if hasattr(metrics["mae_velocity"], "item")
+                else metrics["mae_velocity"]
+            )
+            
             l2_nut = (
                 metrics["l2_nut"].item()
                 if hasattr(metrics["l2_nut"], "item")
                 else metrics["l2_nut"]
             )
+            l1_nut = (
+                metrics["l1_nut"].item()
+                if hasattr(metrics["l1_nut"], "item")
+                else metrics["l1_nut"]
+            )
+            mae_nut = (
+                metrics["mae_nut"].item()
+                if hasattr(metrics["mae_nut"], "item")
+                else metrics["mae_nut"]
+            )
 
             results.append(
                 [
                     batch_idx,
                     f"{loss:.4f}",
                     f"{l2_pressure:.4f}",
-                    f"{l2_velocity_x:.4f}",
-                    f"{l2_velocity_y:.4f}",
-                    f"{l2_velocity_z:.4f}",
+                    f"{l1_pressure:.4f}",
+                    f"{mae_pressure:.4f}",
+                    f"{l2_velocity:.4f}",
+                    f"{l1_velocity:.4f}",
+                    f"{mae_velocity:.4f}",
                     f"{l2_nut:.4f}",
+                    f"{l1_nut:.4f}",
+                    f"{mae_nut:.4f}",
                     f"{elapsed:.4f}",
                 ]
             )
 
     if cfg.datapipe.mode == "surface":
-        pred_drag_coeffs = [r[6] for r in results]
-        pred_lift_coeffs = [r[7] for r in results]
-        true_drag_coeffs = [r[8] for r in results]
-        true_lift_coeffs = [r[9] for r in results]
+        pred_drag_coeffs = [r[8] for r in results]
+        pred_lift_coeffs = [r[9] for r in results]
+        true_drag_coeffs = [r[10] for r in results]
+        true_lift_coeffs = [r[11] for r in results]
 
         # Compute the R2 scores for lift and drag:
         r2_lift = r2_score(true_lift_coeffs, pred_lift_coeffs)
@@ -470,9 +524,11 @@ def inference(cfg: DictConfig) -> None:
             "Batch",
             "Loss",
             "L2 Pressure",
-            "L2 Shear X",
-            "L2 Shear Y",
-            "L2 Shear Z",
+            "L1 Pressure",
+            "MAE Pressure",
+            "L2 Wall Shear Stress",
+            "L1 Wall Shear Stress",
+            "MAE Wall Shear Stress",
             "Predicted Drag Coefficient",
             "Pred Lift Coefficient",
             "True Drag Coefficient",
@@ -484,21 +540,37 @@ def inference(cfg: DictConfig) -> None:
         )
         logger.info(f"R2 score for lift: {r2_lift:.4f}")
         logger.info(f"R2 score for drag: {r2_drag:.4f}")
+        csv_filename = f"{cfg.output_dir}/{cfg.run_id}/surface_inference_results_{datetime.now()}.csv"
+        with open(csv_filename, 'w', newline='') as f:
+            writer = csv.writer(f)
+            writer.writerow(headers)
+            writer.writerows(results)
+        logger.info(f"Results saved to {csv_filename}")
 
     elif cfg.datapipe.mode == "volume":
         headers = [
             "Batch",
             "Loss",
             "L2 Pressure",
-            "L2 Velocity X",
-            "L2 Velocity Y",
-            "L2 Velocity Z",
+            "L1 Pressure",
+            "MAE Pressure",
+            "L2 Velocity",
+            "L1 Velocity",
+            "MAE Velocity",
             "L2 Nut",
+            "L1 Nut",
+            "MAE Nut",
             "Elapsed (s)",
         ]
         logger.info(
             f"Results:\n{tabulate(results, headers=headers, tablefmt='github')}"
         )
+        csv_filename = f"{cfg.output_dir}/{cfg.run_id}/volume_inference_results_{datetime.now()}.csv"
+        with open(csv_filename, 'w', newline='') as f:
+            writer = csv.writer(f)
+            writer.writerow(headers)
+            writer.writerows(results)
+        logger.info(f"Results saved to {csv_filename}")
 
     # Calculate means for each metric (skip batch index)
     if results:
diff --git a/examples/cfd/external_aerodynamics/transolver/src/metrics.py b/examples/cfd/external_aerodynamics/transformer_models/src/metrics.py
similarity index 57%
rename from examples/cfd/external_aerodynamics/transolver/src/metrics.py
rename to examples/cfd/external_aerodynamics/transformer_models/src/metrics.py
index 34dbb74aff..080526f23a 100644
--- a/examples/cfd/external_aerodynamics/transolver/src/metrics.py
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/metrics.py
@@ -101,6 +101,38 @@ def metrics_fn_volume(
     Raises:
         NotImplementedError: Always, as this function is not yet implemented.
     """
+
+    # 
+    pressure_pred = pred[:, :, 3]
+    pressure_target = target[:, :, 3]
+
+    velocity_pred = torch.sqrt(torch.sum(pred[:, :, 0:3]**2.0, dim=2))
+    velocity_target = torch.sqrt(torch.sum(target[:, :, 0:3]**2.0, dim=2))
+
+    # L1 errors
+    l1_num = torch.abs(pred - target)
+    l1_num = torch.sum(l1_num, dim=1)
+
+    l1_denom = torch.abs(target)
+    l1_denom = torch.sum(l1_denom, dim=1)
+
+    l1 = l1_num / l1_denom
+
+    # L1 errors velocity
+    l1_num_vel = torch.abs(velocity_pred - velocity_target)
+    l1_num_vel = torch.sum(l1_num_vel)
+
+    l1_denom_vel = torch.abs(velocity_target)
+    l1_denom_vel = torch.sum(l1_denom_vel)
+
+    l1_vel = l1_num_vel / l1_denom_vel
+
+    # MAE
+    mae_num = torch.abs(pred - target)
+    mae_num_vel = torch.abs(velocity_pred - velocity_target)
+    mae_pressure = torch.abs(pressure_pred - pressure_target)
+    
+    # L2 errors
     l2_num = (pred - target) ** 2
     l2_num = torch.sum(l2_num, dim=1)
     l2_num = torch.sqrt(l2_num)
@@ -111,12 +143,36 @@ def metrics_fn_volume(
 
     l2 = l2_num / l2_denom
 
+    # L2 errors velocity
+    l2_num_vel = (velocity_pred - velocity_target) ** 2
+    l2_num_vel = torch.sum(l2_num_vel)
+    l2_num_vel = torch.sqrt(l2_num_vel)
+
+    l2_denom_vel = velocity_target**2
+    l2_denom_vel = torch.sum(l2_denom_vel)
+    l2_denom_vel = torch.sqrt(l2_denom_vel)
+
+    l2_vel = l2_num_vel / l2_denom_vel
+
     metrics = {
         "l2_pressure_vol": torch.mean(l2[:, 3]),
         "l2_velocity_x": torch.mean(l2[:, 0]),
         "l2_velocity_y": torch.mean(l2[:, 1]),
         "l2_velocity_z": torch.mean(l2[:, 2]),
         "l2_nut": torch.mean(l2[:, 4]),
+        "l1_pressure_vol": torch.mean(l1[:, 3]),
+        "l1_velocity_x": torch.mean(l1[:, 0]),
+        "l1_velocity_y": torch.mean(l1[:, 1]),
+        "l1_velocity_z": torch.mean(l1[:, 2]),
+        "l1_nut": torch.mean(l1[:, 4]),
+        "mae_pressure_vol": torch.mean(mae_pressure),
+        "mae_velocity_x": torch.mean(mae_num[:, :, 0]),
+        "mae_velocity_y": torch.mean(mae_num[:, :, 1]),
+        "mae_velocity_z": torch.mean(mae_num[:, :, 2]),
+        "mae_nut": torch.mean(mae_num[:, 4]),
+        "l2_velocity": torch.mean(l2_vel),
+        "l1_velocity": torch.mean(l1_vel),
+        "mae_velocity": torch.mean(mae_num_vel),
     }
 
     return metrics
@@ -144,6 +200,36 @@ def metrics_fn_surface(
     # target = target * norm_factors["std"] + norm_factors["mean"]
     # pred = pred * norm_factors["std"] + norm_factors["mean"]
 
+    pressure_pred = pred[:, :, 0]
+    pressure_target = target[:, :, 0]
+
+    wall_shear_pred = torch.sqrt(torch.sum(pred[:, :, 1:4]**2.0, dim=2))
+    wall_shear_target = torch.sqrt(torch.sum(target[:, :, 1:4]**2.0, dim=2))
+
+    # MAE
+    mae_num = torch.abs(pred - target)
+    mae_wall_shear = torch.abs(wall_shear_pred - wall_shear_target)
+    mae_pressure = torch.abs(pressure_pred - pressure_target)
+
+    # L1 errors
+    l1_num = torch.abs(pred - target)
+    l1_num = torch.sum(l1_num, dim=1)
+
+    l1_denom = torch.abs(target)
+    l1_denom = torch.sum(l1_denom, dim=1)
+
+    l1 = l1_num / l1_denom
+
+    # L1 errors for wall shear stress
+    l1_num_ws = torch.abs(wall_shear_pred - wall_shear_target)
+    l1_num_ws = torch.sum(l1_num_ws)
+
+    l1_denom_ws = torch.abs(wall_shear_target)
+    l1_denom_ws = torch.sum(l1_denom_ws)
+
+    l1_ws = l1_num_ws / l1_denom_ws
+
+    # L2 errors
     l2_num = (pred - target) ** 2
     l2_num = torch.sum(l2_num, dim=1)
     l2_num = torch.sqrt(l2_num)
@@ -154,11 +240,33 @@ def metrics_fn_surface(
 
     l2 = l2_num / l2_denom
 
+    # L2 errors for wall shear stress
+    l2_num_ws = (wall_shear_pred - wall_shear_target) ** 2
+    l2_num_ws = torch.sum(l2_num_ws)
+    l2_num_ws = torch.sqrt(l2_num_ws)
+
+    l2_denom_ws = wall_shear_target**2
+    l2_denom_ws = torch.sum(l2_denom_ws)
+    l2_denom_ws = torch.sqrt(l2_denom_ws)
+
+    l2_ws = l2_num_ws / l2_denom_ws
+
     metrics = {
         "l2_pressure_surf": torch.mean(l2[:, 0]),
         "l2_shear_x": torch.mean(l2[:, 1]),
         "l2_shear_y": torch.mean(l2[:, 2]),
         "l2_shear_z": torch.mean(l2[:, 3]),
+        "l1_pressure_surf": torch.mean(l1[:, 0]),
+        "l1_shear_x": torch.mean(l1[:, 1]),
+        "l1_shear_y": torch.mean(l1[:, 2]),
+        "l1_shear_z": torch.mean(l1[:, 3]),
+        "mae_pressure_surf": torch.mean(mae_pressure),
+        "mae_shear_x": torch.mean(mae_num[:, :, 1]),
+        "mae_shear_y": torch.mean(mae_num[:, :, 2]),
+        "mae_shear_z": torch.mean(mae_num[:, :, 3]),
+        "l2_wall_shear_stress": torch.mean(l2_ws),
+        "l1_wall_shear_stress": torch.mean(l1_ws),
+        "mae_wall_shear_stress": torch.mean(mae_wall_shear),
     }
 
     return metrics
diff --git a/examples/cfd/external_aerodynamics/transolver/src/preprocess.py b/examples/cfd/external_aerodynamics/transformer_models/src/preprocess.py
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/src/preprocess.py
rename to examples/cfd/external_aerodynamics/transformer_models/src/preprocess.py
diff --git a/examples/cfd/external_aerodynamics/transolver/src/surface_fields_normalization.npz b/examples/cfd/external_aerodynamics/transformer_models/src/surface_fields_normalization.npz
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/src/surface_fields_normalization.npz
rename to examples/cfd/external_aerodynamics/transformer_models/src/surface_fields_normalization.npz
diff --git a/examples/cfd/external_aerodynamics/transolver/src/train.py b/examples/cfd/external_aerodynamics/transformer_models/src/train.py
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/src/train.py
rename to examples/cfd/external_aerodynamics/transformer_models/src/train.py
diff --git a/examples/cfd/external_aerodynamics/transolver/src/utils.py b/examples/cfd/external_aerodynamics/transformer_models/src/utils.py
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/src/utils.py
rename to examples/cfd/external_aerodynamics/transformer_models/src/utils.py
diff --git a/examples/cfd/external_aerodynamics/transolver/src/volume_fields_normalization.npz b/examples/cfd/external_aerodynamics/transformer_models/src/volume_fields_normalization.npz
similarity index 100%
rename from examples/cfd/external_aerodynamics/transolver/src/volume_fields_normalization.npz
rename to examples/cfd/external_aerodynamics/transformer_models/src/volume_fields_normalization.npz
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/combined.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/combined.yaml
deleted file mode 100644
index e4bcbd16c1..0000000000
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/combined.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# You may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-defaults:
-  -  core
-
-# Overrides for combined data:
-mode: combined
-
-# combined-speficic needs:
-data_keys:
-  - "volume_fields"
-  - "volume_mesh_centers"
-  - "surface_fields"
-  - "surface_mesh_centers"
-  - "surface_normals"
-  - "surface_areas"
-  - "air_density"
-  - "stream_velocity"
-  - "stl_faces"
-  - "stl_centers"
-  - "stl_coordinates"
\ No newline at end of file
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/core.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/core.yaml
deleted file mode 100644
index 67adcb45f6..0000000000
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/core.yaml
+++ /dev/null
@@ -1,60 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# You may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Paths to your data:
-train:
-  data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/train/
-val:
-  data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/val/
-
-# You can set a normalization factor directory:
-normalization_dir: "src/"
-
-# How many events in advance should we be preloading?
-preload_depth: 1
-
-# Pin memory for GPU transfers?
-pin_memory: true
-
-# Sampling resolution of the point clouds:
-resolution: 200_000
-
-# Surface / Volume / (combined, if supported)
-mode: ???
-
-# For building embeddings: include normal directions for each point?
-include_normals: true
-# Include SDF?  (It's 0 for surface data...)
-include_sdf: true
-# Apply translation invariance via center-of-mass subtraction?
-translational_invariance: true
-# Rescale x/y/z inputs to the model for scale invariance?
-scale_invariance: true
-reference_scale: [12.0, 4.5, 3.25]
-
-# Which parts of the data files to read?  No need to read everything, all the time.
-data_keys: ???
-
-# Load and return the STL geometry info in the dataloader?
-include_geometry: false
-
-# Broadcast global features to the same resolution as points?
-broadcast_global_features: true
-
-# Return the mesh areas and normals?  You don't usually want this for training.
-# We switch it on automatically for inference.
-return_mesh_features: false
-
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/surface.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/surface.yaml
deleted file mode 100644
index 8363a7a9dc..0000000000
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/datapipe/surface.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# You may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-defaults:
-  -  core
-
-# Overrides for surface data:
-mode: surface
-
-# Surface-speficic needs:
-data_keys:
-  - "surface_fields"
-  - "surface_mesh_centers"
-  - "surface_normals"
-  - "surface_areas"
-  - "air_density"
-  - "stream_velocity"
-  - "stl_faces"
-  - "stl_centers"
-  - "stl_coordinates"
\ No newline at end of file
diff --git a/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml b/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml
deleted file mode 100644
index 2745a00ab6..0000000000
--- a/examples/cfd/external_aerodynamics/transolver/src/conf/model/typhon.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-_target_: physicsnemo.experimental.models.typhon.Typhon
-functional_dim: 6
-global_dim: 2
-geometry_dim: 3
-out_dim: 4
-n_layers: 14
-n_hidden: 256
-dropout: 0.0
-n_head: 8
-act: "gelu"
-mlp_ratio: 2
-slice_num: 128
-use_te: false
-plus: false
-include_local_features: true # use local features
-radii: [0.05, 0.25, 1.0, 2.5] # radius for local features
-neighbors_in_radius: [8, 32, 64, 128] # neighbors in radius for local features
-n_hidden_local: 32 # hidden dimension for local features
-
diff --git a/examples/cfd/external_aerodynamics/transolver/src/inference_on_zarr.py b/examples/cfd/external_aerodynamics/transolver/src/inference_on_zarr.py
deleted file mode 100644
index d2e3498c76..0000000000
--- a/examples/cfd/external_aerodynamics/transolver/src/inference_on_zarr.py
+++ /dev/null
@@ -1,518 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from pathlib import Path
-
-import numpy as np
-import torch
-import torchinfo
-import typing
-import collections
-from typing import Literal
-
-import hydra
-import omegaconf
-from omegaconf import DictConfig
-from physicsnemo.models.transolver.transolver import Transolver
-from physicsnemo.utils import load_checkpoint
-from physicsnemo.utils.logging import PythonLogger, RankZeroLoggingWrapper
-
-from sklearn.metrics import r2_score
-
-from physicsnemo.distributed import DistributedManager
-
-import time
-
-from physicsnemo.datapipes.cae.transolver_datapipe import (
-    create_transolver_dataset,
-    TransolverDataPipe,
-)
-from train import forward_pass
-from tabulate import tabulate
-
-# import transformer_engine.pytorch as te
-# from transformer_engine.common.recipe import Format, DelayedScaling
-from torch.amp import autocast
-from contextlib import nullcontext
-
-from train import (
-    get_autocast_context,
-    pad_input_for_fp8,
-    unpad_output_for_fp8,
-    update_model_params_for_fp8,
-)
-
-# torch.serialization.add_safe_globals([omegaconf.listconfig.ListConfig])
-# torch.serialization.add_safe_globals([omegaconf.base.ContainerMetadata])
-# torch.serialization.add_safe_globals([typing.Any])
-# torch.serialization.add_safe_globals([list])
-# torch.serialization.add_safe_globals([collections.defaultdict])
-# torch.serialization.add_safe_globals([dict])
-# torch.serialization.add_safe_globals([int])
-# torch.serialization.add_safe_globals([omegaconf.nodes.AnyNode])
-# torch.serialization.add_safe_globals([omegaconf.base.Metadata])
-
-
-@torch.no_grad()
-def compute_force_coefficients(
-    normals: torch.Tensor,
-    area: torch.Tensor,
-    coeff: float,
-    p: torch.Tensor,
-    wss: torch.Tensor,
-    force_direction: torch.Tensor = np.array([1, 0, 0]),
-):
-    """
-    Computes force coefficients for a given mesh. Output includes the pressure and skin
-    friction components. Can be used to compute lift and drag.
-    For drag, use the `force_direction` as the direction of the motion,
-    e.g. [1, 0, 0] for flow in x direction.
-    For lift, use the `force_direction` as the direction perpendicular to the motion,
-    e.g. [0, 1, 0] for flow in x direction and weight in y direction.
-
-    Parameters:
-    -----------
-    normals: torch.Tensor
-        The surface normals on cells of the mesh
-    area: torch.Tensor
-        The surface areas of each cell
-    coeff: float
-        Reciprocal of dynamic pressure times the frontal area, i.e. 2/(A * rho * U^2)
-    p: torch.Tensor
-        Pressure distribution on the mesh (on each cell)
-    wss: torch.Tensor
-        Wall shear stress distribution on the mesh (on each cell)
-    force_direction: torch.Tensor
-        Direction to compute the force, default is np.array([1, 0, 0])
-
-    Returns:
-    --------
-    c_total: float
-        Computed total force coefficient
-    c_p: float
-        Computed pressure force coefficient
-    c_f: float
-        Computed skin friction coefficient
-    """
-
-    # Compute coefficients
-    c_p = coeff * torch.sum(torch.sum(normals * force_direction, dim=-1) * area * p)
-    c_f = -coeff * torch.sum(torch.sum(wss * force_direction, dim=-1) * area)
-
-    # Compute total force coefficients
-    c_total = c_p + c_f
-
-    return c_total, c_p, c_f
-
-
-def batched_inference_loop(
-    batch: dict,
-    model: torch.nn.Module,
-    precision: str,
-    data_mode: Literal["surface", "volume"],
-    batch_resolution: int,
-    output_pad_size: int | None,
-    dist_manager: DistributedManager,
-    datapipe: TransolverDataPipe,
-) -> tuple[float, dict, tuple[torch.Tensor, torch.Tensor]]:
-    N = batch["embeddings"].shape[1]
-    # This generates a random ordering of the input points,
-    # Which we'll then slice up into inputs to the model.
-    indices = torch.randperm(N, device=batch["fx"].device)
-
-    index_blocks = torch.split(indices, batch_resolution)
-
-    global_preds_targets = []
-    global_weight = 0.0
-    start = time.time()
-    for i, index_block in enumerate(index_blocks):
-        # We compute the local_batch by slicing from embeddings and fields:
-        local_embeddings = batch["embeddings"][:, index_block]
-        local_fields = batch["fields"][:, index_block]
-
-        # fx does not need to be sliced for TransolverX:
-        if "geometry" not in batch.keys():
-            local_fx = batch["fx"][:, index_block]
-        else:
-            local_fx = batch["fx"]
-
-        local_batch = {
-            "fx": local_fx,
-            "embeddings": local_embeddings,
-            "fields": local_fields,
-        }
-
-        if "air_density" in batch.keys() and "stream_velocity" in batch.keys():
-            local_batch["air_density"] = batch["air_density"]
-            local_batch["stream_velocity"] = batch["stream_velocity"]
-
-        if "geometry" in batch.keys():
-            local_batch["geometry"] = batch["geometry"]
-
-        # Run the forward inference pass:
-        local_loss, local_metrics, local_preds_targets = forward_pass(
-            local_batch,
-            model,
-            precision,
-            output_pad_size,
-            dist_manager,
-            data_mode,
-            datapipe,
-        )
-
-        # Accumulate the loss and metrics:
-        # (Still on the GPU)
-        weight = index_block.shape[0] / N
-        global_weight += weight
-        if i == 0:
-            metrics = {k: local_metrics[k] * weight for k in local_metrics.keys()}
-            loss = local_loss * weight
-        else:
-            metrics = {
-                k: metrics[k] + local_metrics[k] * weight for k in metrics.keys()
-            }
-            loss += local_loss * weight
-
-        global_preds_targets.append(local_preds_targets)
-
-        end = time.time()
-        elapsed = end - start
-        print(
-            f"Completed sub-batch {i} of {len(index_blocks)} in {elapsed:.4f} seconds"
-        )
-        start = end
-
-    # Now, compute the overall loss, metrics, and coefficients:
-    metrics = {k: v / global_weight for k, v in metrics.items()}
-    loss = loss / global_weight
-
-    global_predictions = torch.cat([l[0] for l in global_preds_targets], dim=1)
-    global_targets = torch.cat([l[1] for l in global_preds_targets], dim=1)
-
-    # Now, we have to *unshuffle* the prediction to the original index
-    inverse_indices = torch.empty_like(indices)
-    inverse_indices[indices] = torch.arange(indices.size(0), device=indices.device)
-    # Suppose prediction is of shape [batch, N, ...]
-    global_predictions = global_predictions[:, inverse_indices]
-    global_targets = global_targets[:, inverse_indices]
-    return loss, metrics, (global_predictions, global_targets)
-
-
-def inference(cfg: DictConfig) -> None:
-    """
-    Run inference on a validation Zarr dataset using a trained Transolver model.
-
-    Args:
-        cfg (DictConfig): Hydra configuration object containing model, data, and training settings.
-
-    Returns:
-        None
-    """
-    DistributedManager.initialize()
-
-    dist_manager = DistributedManager()
-
-    logger = RankZeroLoggingWrapper(PythonLogger(name="training"), dist_manager)
-
-    cfg, output_pad_size = update_model_params_for_fp8(cfg, logger)
-
-    logger.info(f"Config:\n{omegaconf.OmegaConf.to_yaml(cfg, resolve=True)}")
-
-    # Set up model
-    model = hydra.utils.instantiate(cfg.model)
-    logger.info(f"\n{torchinfo.summary(model, verbose=0)}")
-
-    if cfg.checkpoint_dir is not None:
-        checkpoint_dir = cfg.checkpoint_dir
-    else:
-        checkpoint_dir = f"{cfg.output_dir}/{cfg.run_id}/checkpoints"
-
-    ckpt_args = {
-        "path": checkpoint_dir,
-        "models": model,
-    }
-
-    loaded_epoch = load_checkpoint(device=dist_manager.device, **ckpt_args)
-    logger.info(f"loaded epoch: {loaded_epoch}")
-    model.to(dist_manager.device)
-
-    num_params = sum(p.numel() for p in model.parameters())
-    logger.info(f"Number of parameters: {num_params}")
-
-    # Load the normalization file from configured directory (defaults to current dir)
-    norm_dir = getattr(cfg.data, "normalization_dir", ".")
-    if cfg.data.mode == "surface":
-        norm_file = str(Path(norm_dir) / "surface_fields_normalization.npz")
-    elif cfg.data.mode == "volume":
-        norm_file = str(Path(norm_dir) / "volume_fields_normalization.npz")
-
-    norm_data = np.load(norm_file)
-    norm_factors = {
-        "mean": torch.from_numpy(norm_data["mean"]).to(dist_manager.device),
-        "std": torch.from_numpy(norm_data["std"]).to(dist_manager.device),
-    }
-
-    if cfg.compile:
-        model = torch.compile(model, dynamic=True)
-    model.eval()
-
-    # For INFERENCE, we deliberately set the resolution in the data pipe to NONE
-    # so there is not downsampling.  We still batch it in the inference script
-    # for memory usage constraints.
-
-    batch_resolution = cfg.data.resolution
-    cfg.data.resolution = None
-    ## Make sure to read the whole data sample for volume:
-    if cfg.data.mode == "volume":
-        cfg.data.volume_sample_from_disk = False
-
-    # And we need the mesh features for drag, lift in surface data:
-    if cfg.data.mode == "surface":
-        cfg.data.return_mesh_features = True
-
-    # Validation dataset
-    val_dataset = create_transolver_dataset(
-        cfg.data,
-        phase="val",
-        scaling_factors=norm_factors,
-    )
-
-    results = []
-    start = time.time()
-    for batch_idx, batch in enumerate(val_dataset):
-        with torch.no_grad():
-            loss, metrics, (global_predictions, global_targets) = (
-                batched_inference_loop(
-                    batch,
-                    model,
-                    cfg.precision,
-                    cfg.data.mode,
-                    batch_resolution,
-                    output_pad_size,
-                    dist_manager,
-                    val_dataset,
-                )
-            )
-        end = time.time()
-        elapsed = end - start
-        logger.info(f"Finished batch {batch_idx} in {elapsed:.4f} seconds")
-        start = time.time()
-
-        if cfg.data.mode == "surface":
-            coeff = 1.0
-
-            # Compute the drag and loss coefficients:
-            # (Index on [0] is to remove the 1 batch index)
-            pred_pressure, pred_shear = torch.split(
-                global_predictions[0], (1, 3), dim=-1
-            )
-
-            pred_pressure = pred_pressure.reshape(-1)
-            pred_drag_coeff, _, _ = compute_force_coefficients(
-                batch["surface_normals"][0],
-                batch["surface_areas"],
-                coeff,
-                pred_pressure,
-                pred_shear,
-                torch.tensor([[1, 0, 0]], device=dist_manager.device),
-            )
-
-            pred_lift_coeff, _, _ = compute_force_coefficients(
-                batch["surface_normals"][0],
-                batch["surface_areas"],
-                coeff,
-                pred_pressure,
-                pred_shear,
-                torch.tensor([[0, 0, 1]], device=dist_manager.device),
-            )
-
-            # air_density = batch["air_density"] if "air_density" in batch.keys() else None
-            # stream_velocity = batch["stream_velocity"] if "stream_velocity" in batch.keys() else None
-            # true_fields = val_dataset.unscale_model_targets(batch["fields"], air_density=air_density, stream_velocity=stream_velocity)
-            true_pressure, true_shear = torch.split(global_targets[0], (1, 3), dim=-1)
-
-            true_pressure = true_pressure.reshape(-1)
-            true_drag_coeff, _, _ = compute_force_coefficients(
-                batch["surface_normals"][0],
-                batch["surface_areas"],
-                coeff,
-                true_pressure,
-                true_shear,
-                torch.tensor([[1, 0, 0]], device=dist_manager.device),
-            )
-
-            true_lift_coeff, _, _ = compute_force_coefficients(
-                batch["surface_normals"][0],
-                batch["surface_areas"],
-                coeff,
-                true_pressure,
-                true_shear,
-                torch.tensor([[0, 0, 1]], device=dist_manager.device),
-            )
-
-            pred_lift_coeff = pred_lift_coeff.item()
-            pred_drag_coeff = pred_drag_coeff.item()
-
-            # Extract metric values and convert tensors to floats
-            l2_pressure = (
-                metrics["l2_pressure_surf"].item()
-                if hasattr(metrics["l2_pressure_surf"], "item")
-                else metrics["l2_pressure_surf"]
-            )
-            l2_shear_x = (
-                metrics["l2_shear_x"].item()
-                if hasattr(metrics["l2_shear_x"], "item")
-                else metrics["l2_shear_x"]
-            )
-            l2_shear_y = (
-                metrics["l2_shear_y"].item()
-                if hasattr(metrics["l2_shear_y"], "item")
-                else metrics["l2_shear_y"]
-            )
-            l2_shear_z = (
-                metrics["l2_shear_z"].item()
-                if hasattr(metrics["l2_shear_z"], "item")
-                else metrics["l2_shear_z"]
-            )
-
-            results.append(
-                [
-                    batch_idx,
-                    f"{loss:.4f}",
-                    f"{l2_pressure:.4f}",
-                    f"{l2_shear_x:.4f}",
-                    f"{l2_shear_y:.4f}",
-                    f"{l2_shear_z:.4f}",
-                    f"{pred_drag_coeff:.4f}",
-                    f"{pred_lift_coeff:.4f}",
-                    f"{true_drag_coeff:.4f}",
-                    f"{true_lift_coeff:.4f}",
-                    f"{elapsed:.4f}",
-                ]
-            )
-
-        elif cfg.data.mode == "volume":
-            # Extract metric values and convert tensors to floats
-            l2_pressure = (
-                metrics["l2_pressure_vol"].item()
-                if hasattr(metrics["l2_pressure_vol"], "item")
-                else metrics["l2_pressure_vol"]
-            )
-            l2_velocity_x = (
-                metrics["l2_velocity_x"].item()
-                if hasattr(metrics["l2_velocity_x"], "item")
-                else metrics["l2_velocity_x"]
-            )
-            l2_velocity_y = (
-                metrics["l2_velocity_y"].item()
-                if hasattr(metrics["l2_velocity_y"], "item")
-                else metrics["l2_velocity_y"]
-            )
-            l2_velocity_z = (
-                metrics["l2_velocity_z"].item()
-                if hasattr(metrics["l2_velocity_z"], "item")
-                else metrics["l2_velocity_z"]
-            )
-            l2_nut = (
-                metrics["l2_nut"].item()
-                if hasattr(metrics["l2_nut"], "item")
-                else metrics["l2_nut"]
-            )
-
-            results.append(
-                [
-                    batch_idx,
-                    f"{loss:.4f}",
-                    f"{l2_pressure:.4f}",
-                    f"{l2_velocity_x:.4f}",
-                    f"{l2_velocity_y:.4f}",
-                    f"{l2_velocity_z:.4f}",
-                    f"{l2_nut:.4f}",
-                    f"{elapsed:.4f}",
-                ]
-            )
-
-    if cfg.data.mode == "surface":
-        pred_drag_coeffs = [r[6] for r in results]
-        pred_lift_coeffs = [r[7] for r in results]
-        true_drag_coeffs = [r[8] for r in results]
-        true_lift_coeffs = [r[9] for r in results]
-
-        # Compute the R2 scores for lift and drag:
-        r2_lift = r2_score(true_lift_coeffs, pred_lift_coeffs)
-        r2_drag = r2_score(true_drag_coeffs, pred_drag_coeffs)
-
-        headers = [
-            "Batch",
-            "Loss",
-            "L2 Pressure",
-            "L2 Shear X",
-            "L2 Shear Y",
-            "L2 Shear Z",
-            "Predicted Drag Coefficient",
-            "Pred Lift Coefficient",
-            "True Drag Coefficient",
-            "True Lift Coefficient",
-            "Elapsed (s)",
-        ]
-        logger.info(
-            f"Results:\n{tabulate(results, headers=headers, tablefmt='github')}"
-        )
-        logger.info(f"R2 score for lift: {r2_lift:.4f}")
-        logger.info(f"R2 score for drag: {r2_drag:.4f}")
-
-    elif cfg.data.mode == "volume":
-        headers = [
-            "Batch",
-            "Loss",
-            "L2 Pressure",
-            "L2 Velocity X",
-            "L2 Velocity Y",
-            "L2 Velocity Z",
-            "L2 Nut",
-            "Elapsed (s)",
-        ]
-        logger.info(
-            f"Results:\n{tabulate(results, headers=headers, tablefmt='github')}"
-        )
-
-    # Calculate means for each metric (skip batch index)
-    if results:
-        # Convert string values back to float for mean calculation
-        arr = np.array(results)[:, 1:].astype(float)
-        means = arr.mean(axis=0)
-        mean_row = ["Mean"] + [f"{m:.4f}" for m in means]
-        logger.info(
-            f"Summary:\n{tabulate([mean_row], headers=headers, tablefmt='github')}"
-        )
-
-
-@hydra.main(version_base=None, config_path="conf", config_name="train_surface")
-def launch(cfg: DictConfig) -> None:
-    """
-    Launch inference with Hydra configuration.
-
-    Args:
-        cfg (DictConfig): Hydra configuration object.
-
-    Returns:
-        None
-    """
-    inference(cfg)
-
-
-if __name__ == "__main__":
-    launch()
diff --git a/examples/cfd/external_aerodynamics/typhon/README.md b/examples/cfd/external_aerodynamics/typhon/README.md
deleted file mode 100644
index d6dd7046d8..0000000000
--- a/examples/cfd/external_aerodynamics/typhon/README.md
+++ /dev/null
@@ -1,284 +0,0 @@
-<!-- markdownlint-disable -->
-# `Transolver` for External Aerodynamics on Irregular Meshes
-
-This example is an end to end training recipe for the `Transolver` model, which can
-be run on surface or volume data.
-
-`Transolver` is a high-performance surrogate model for CFD solvers. The Transolver model
-adapts the Attention mechanism, encouraging the learning of meaningful representations.
-In each PhysicsAttention layer, input points are projected onto state vectors through
-learnable transformations and weights. These transformations are then used to compute
-self-attention among all state vectors, and the same weights are reused to project
-states back to each input point.
-
-## External Aerodynamics CFD Example: Overview
-
-This directory contains the essential components for training and evaluating a
-model tailored to external aerodynamics CFD problems built on `Transolver`.
-
-By stacking multiple PhysicsAttention layers, the `Transolver` model learns to map from
-the functional input space to the output space with high fidelity. The PhysicsNeMo
-implementation closely follows the original Transolver architecture
-([https://github.com/thuml/Transolver](https://github.com/thuml/Transolver)), but
-introduces modifications for improved numerical stability and compatibility with NVIDIA
-TransformerEngine.
-
-The training example for Transolver uses the [DrivaerML dataset](https://caemldatasets.org/drivaerml/).
-
-As a concrete example, we are training external aerodynamics surrogate models for automobiles.
-`Transolver` takes as input a point cloud on the surface or surrounding the surface,
-iteratively processing it with PhysicsAttention to produce high-fidelity predictions.
-
-## Requirements
-
-Transolver can use TransformerEngine from NVIDIA, as well as tensorstore (for IO),
-zarr, einops and a few other python packages.  Install them with `pip install -r requirements.txt`
-as well as physicsnemo 25.11 or higher.
-
-## Using Transolver for External Aerodynamics
-
-1. Prepare the Dataset.  Transolver uses the same Zarr outputs as other models with DrivaerML.
-`PhysicsNeMo` has a related project to help with data processing, called [PhysicsNeMo-Curator](https://github.com/NVIDIA/physicsnemo-curator).
-Using `PhysicsNeMo-Curator`, the data needed to train can be setup easily.
-Please refer to [these instructions on getting started](https://github.com/NVIDIA/physicsnemo-curator?tab=readme-ov-file#what-is-physicsnemo-curator)
-with `PhysicsNeMo-Curator`.  For specifics of preparing the dataset for this example,
-see the [download](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/domino/README.md#download-drivaerml-dataset)
-and [preprocessing](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/domino/README.md)
-instructions from `physicsnemo-curator`.  Users should apply the
-preprocessing steps locally to produce `zarr` output files.
-
-2. Train your model.  The model and training configuration is configured with
-`hydra`, and two configurations are available: `transolver_surface` and `transolver_volume`.
-Find configurations in `src/conf`, where you can control both network properties
-and training properties. See below for an overview and explanation of key
-parameters that may be of special interest.
-
-3. Use the trained model to perform inference.  This example contains two
-inference examples: one for inference on the validation set, already in
-Zarr format.  The `.vtp` inference pipeline is being updated to accomodate Transolver.
-
-The following sections contain further details on the training and inference
-recipe.
-
-## Model Training
-
-To train the model, first we compute normalization factors on the dataset to
-make the predictive quantities output in a well-defined range. The included
-script, `compute_normalizations.py`, will compute the normalization
-factors.  Once run, it should save to an output file similar to
-"surface_fields_normalization.npz".  This will get loaded during training.
-The normalization file location can be configured via `data.normalization_dir`
-in the training configuration (defaults to current directory).
-
-> By default, the normalization sets the mean to 0.0 and std to 1.0 of all labels
-> in the dataset, computing the mean across the train dataset.  You could adapt
-> this to a different normalization, however take care to update both the
-> preprocessing as well as inference scripts.  Min/Max is another popular strategy.
-
-To configure your training run, use `hydra`.  The
-config contains sections for the model, data, optimizer, and training settings.
-For details on the model parameters, see the API for `physicsnemo.models.transolver`.
-
-To fit the training into memory, you can apply on-the-fly downsampling to the data
-with `data.resolution=N`, where `N` is how many points per GPU to use.  This dataloader
-will yield the full data examples in shapes of `[1, K, f]` where `K` is the resolution
-of the mesh, and `f` is the feature space (3 for points, normals, etc.  4 for surface
-fields).  Downsampling happens in the preprocessing pipeline.
-
-During training, the configuration uses a flat learning rate that decays every 100
-epochs, and bfloat16 format by default.  The scheduler and learning rate
-may be configured.  
-
-The Optimizer for this training is the `Muon` optimizer - available only in
-`pytorch>=2.9.0`.  While not strictly required, we have found the `muon` optimizer
-performs substantially better on these architectures than standard `AdamW` and
-a oneCycle schedule.
-
-### Training Precision
-
-Transolver, as a transformer-like architecture, has support for NVIDIA's
-[TransformerEngine](https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/index.html)
-built in.  You can enable/disable the transformer engine path in the model with
-`model.use_te=[True | False]`.  Available precisions for training with `transformer_engine`
-are `training.precision=["float32" | "float16" | "bfloat16" | "float8" ]`.  In `float8`
-precision, the TransformerEngine Hybrid recipe is used for casting weights and inputs
-in the forward and backwards passes.  For more details on `float8` precision, see
-the fp8 guide from
-[TransformerEngine](https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/examples/fp8_primer.html).
-When using fp8, the training script will automatically pad and unpad the input and output,
-respectively, to use the fp8 hardware correctly.
-
-> **Float8** precisions are only available on GPUs with fp8 tensorcore support, such
-> as Hopper, Blackwell, Ada Lovelace, and others.
-
-### Other Configuration Settings
-
-Several other important configuration settings are available:
-
-- `checkpoint_dir` sets the directory for saving model checkpoints (defaults to `output_dir`
-if not specified), allowing separation of checkpoints from other outputs.
-- `compile` will use `torch.compile` for optimized performance.  It is not
-compatible with `transformer_engine` (`model.use_te=True`).  If TransformerEngine is
-not used, and half precision is, `torch.compile` is recommended for improved performance.
-- `training.num_epochs` controls the total number of epochs used during training.
-- `training.save_interval` will dictate how often the model weights and training
-tools are checkpointed.
-
-> **Note** Like other parameters of the model, changing the value of `model.use_te`
-> will make checkpoints incompatible.
-
-The training script supports data-parallel training via PyTorch DDP.  In a future
-update, we may enable domain parallelism via FSDP and ShardTensor.
-
-The script can be launched on a single GPU with, for example,
-
-```bash
-python train.py --config-name transolver_surface
-```
-
-or, for multi-GPU training, use `torchrun` or other distributed job launch tools.
-
-Example output for one epoch of the script, in an 8 GPU run, looks like:
-
-```default
-[2025-07-17 14:27:36,040][training][INFO] - Epoch 47 [0/54] Loss: 0.117565 Duration: 0.78s
-[2025-07-17 14:27:36,548][training][INFO] - Epoch 47 [1/54] Loss: 0.109625 Duration: 0.51s
-[2025-07-17 14:27:37,048][training][INFO] - Epoch 47 [2/54] Loss: 0.122574 Duration: 0.50s
-[2025-07-17 14:27:37,556][training][INFO] - Epoch 47 [3/54] Loss: 0.125667 Duration: 0.51s
-[2025-07-17 14:27:38,063][training][INFO] - Epoch 47 [4/54] Loss: 0.101863 Duration: 0.51s
-[2025-07-17 14:27:38,547][training][INFO] - Epoch 47 [5/54] Loss: 0.113324 Duration: 0.48s
-[2025-07-17 14:27:39,054][training][INFO] - Epoch 47 [6/54] Loss: 0.115478 Duration: 0.51s
-...[remove for brevity]...
-[2025-07-17 14:28:00,662][training][INFO] - Epoch 47 [49/54] Loss: 0.107935 Duration: 0.49s
-[2025-07-17 14:28:01,178][training][INFO] - Epoch 47 [50/54] Loss: 0.100087 Duration: 0.52s
-[2025-07-17 14:28:01,723][training][INFO] - Epoch 47 [51/54] Loss: 0.097733 Duration: 0.55s
-[2025-07-17 14:28:02,194][training][INFO] - Epoch 47 [52/54] Loss: 0.116489 Duration: 0.47s
-[2025-07-17 14:28:02,605][training][INFO] - Epoch 47 [53/54] Loss: 0.104865 Duration: 0.41s
-
-Epoch 47 Average Metrics:
-+-------------+---------------------+
-|   Metric    |    Average Value    |
-+-------------+---------------------+
-| l2_pressure | 0.20262257754802704 |
-| l2_shear_x  | 0.2623567283153534  |
-| l2_shear_y  | 0.35603201389312744 |
-| l2_shear_z  | 0.38965049386024475 |
-+-------------+---------------------+
-
-[2025-07-17 14:28:02,834][training][INFO] - Val [0/6] Loss: 0.114801 Duration: 0.22s
-[2025-07-17 14:28:03,074][training][INFO] - Val [1/6] Loss: 0.111632 Duration: 0.24s
-[2025-07-17 14:28:03,309][training][INFO] - Val [2/6] Loss: 0.105342 Duration: 0.23s
-[2025-07-17 14:28:03,537][training][INFO] - Val [3/6] Loss: 0.111033 Duration: 0.23s
-[2025-07-17 14:28:03,735][training][INFO] - Val [4/6] Loss: 0.099963 Duration: 0.20s
-[2025-07-17 14:28:03,903][training][INFO] - Val [5/6] Loss: 0.092340 Duration: 0.17s
-
-Epoch 47 Validation Average Metrics:
-+-------------+---------------------+
-|   Metric    |    Average Value    |
-+-------------+---------------------+
-| l2_pressure | 0.19346082210540771 |
-| l2_shear_x  | 0.26041051745414734 |
-| l2_shear_y  | 0.3589216470718384  |
-| l2_shear_z  |  0.370105117559433  |
-+-------------+---------------------+
-```
-
-## Dataset Inference
-
-The validation dataset in Zarr format can be loaded, processed, and the L2
-metrics summarized in `inference_on_zarr.py`.  For surface data, this script will also
-compute the drag and lift coefficients and the R^2 correlation of the predictions.
-
-To run inference on surface data, it's necessary to add a line to your launch command:
-
-```
-python src/inference_on_zarr.py --config-name transolver_surface run_id=/path/to/model/
-
-```
-
-The `data.return_mesh_features` flag can also be set in the config file.  It is
-disabled for training but necessary for inference.  The model path should be
-the folder containing your saved checkpoints.
-
-
-To ensure correct calculation of drag and lift, and accurate overall metrics,
-the inference script will chunk a full-resolution training example into batches,
-and stitch the outputs together at the end.  Output will appear as a table
-with all metrics for that mode, for example:
-
-```
-|   Batch |   Loss |   L2 Pressure |   L2 Shear X |   L2 Shear Y |   L2 Shear Z |   Predicted Drag Coefficient |   Pred Lift Coefficient |   True Drag Coefficient |   True Lift Coefficient |   Elapsed (s) |
-|---------|--------|---------------|--------------|--------------|--------------|------------------------------|-------------------------|-------------------------|-------------------------|---------------|
-|       0 | 0.0188 |        0.0491 |       0.0799 |       0.1023 |       0.1174 |                      488.075 |                140.365  |                 475.534 |                135.944  |        8.1281 |
-|       1 | 0.0144 |        0.045  |       0.0659 |       0.0955 |       0.107  |                      404.472 |                 21.8897 |                 406.484 |                 35.6202 |        0.7348 |
-|       2 | 0.0239 |        0.0505 |       0.0835 |       0.1101 |       0.1592 |                      383.219 |                 41.973  |                 373.999 |                 43.7198 |        1.6722 |
-|       3 | 0.0255 |        0.0526 |       0.088  |       0.1151 |       0.1305 |                      576.671 |                230.185  |                 579.655 |                210.01   |        1.4369 |
-|       4 | 0.0214 |        0.0498 |       0.0849 |       0.109  |       0.1229 |                      451.478 |                -45.3076 |                 447.109 |                -36.7298 |        1.8973 |
-|       5 | 0.0147 |        0.0402 |       0.0671 |       0.0923 |       0.0992 |                      419.76  |                -87.7945 |                 424.63  |                -83.8417 |        1.7255 |
-|       6 | 0.0171 |        0.0463 |       0.0742 |       0.1016 |       0.126  |                      350.877 |                -32.1908 |                 338.721 |                -25.5008 |        1.3738 |
-|       7 | 0.0248 |        0.0596 |       0.0989 |       0.123  |       0.1299 |                      420.122 |                -42.3073 |                 420.772 |                -16.9301 |        1.9126 |
-|       8 | 0.0178 |        0.0453 |       0.0736 |       0.1021 |       0.118  |                      380.704 |                -90.6937 |                 374.134 |                -87.2395 |        1.8081 |
-|       9 | 0.0297 |        0.0629 |       0.1004 |       0.1245 |       0.1418 |                      400.315 |               -149.927  |                 396.178 |               -147.33   |        1.6693 |
-|      10 | 0.0303 |        0.0674 |       0.0978 |       0.1233 |       0.1455 |                      602.585 |                249.985  |                 588.987 |                237.999  |        1.6581 |
-|      11 | 0.0188 |        0.0514 |       0.0772 |       0.1006 |       0.1114 |                      593.366 |                155.859  |                 590.833 |                167.067  |        1.6914 |
-|      12 | 0.0147 |        0.0436 |       0.0681 |       0.0929 |       0.1009 |                      457.252 |                 77.7093 |                 449.866 |                 77.2836 |        1.734  |
-|      13 | 0.0226 |        0.0529 |       0.0902 |       0.1092 |       0.1319 |                      374.561 |                -88.923  |                 372.675 |               -101.469  |        1.3918 |
-|      14 | 0.0186 |        0.0591 |       0.0758 |       0.1056 |       0.1199 |                      516.445 |                275.197  |                 512.238 |                274.633  |        1.7587 |
-|      15 | 0.0145 |        0.0443 |       0.0691 |       0.0974 |       0.1083 |                      397.664 |                 44.4129 |                 395.376 |                 31.417  |        1.6531 |
-|      16 | 0.019  |        0.0502 |       0.0828 |       0.1028 |       0.1145 |                      502.079 |                 75.96   |                 501.056 |                 77.4457 |        1.6815 |
-|      17 | 0.0155 |        0.0459 |       0.0721 |       0.1003 |       0.1064 |                      472.191 |                138.568  |                 460.808 |                139.42   |        1.7288 |
-|      18 | 0.0186 |        0.0549 |       0.0783 |       0.1074 |       0.1162 |                      482.58  |                 37.7236 |                 482.344 |                 37.2805 |        1.7915 |
-|      19 | 0.0148 |        0.0425 |       0.078  |       0.1004 |       0.113  |                      448.504 |                157.548  |                 446.845 |                173.68   |        1.8042 |
-|      20 | 0.0144 |        0.0424 |       0.072  |       0.0946 |       0.0993 |                      500.781 |                 81.4317 |                 490.024 |                 85.8991 |        1.7812 |
-|      21 | 0.0142 |        0.0462 |       0.0669 |       0.0983 |       0.0982 |                      483.057 |                134.258  |                 473.958 |                121.551  |        1.8255 |
-|      22 | 0.0149 |        0.0432 |       0.0671 |       0.0964 |       0.1004 |                      510.518 |                162.651  |                 504.159 |                164.953  |        1.8021 |
-|      23 | 0.0182 |        0.05   |       0.074  |       0.101  |       0.116  |                      388.014 |               -223.932  |                 393.797 |               -229.571  |        2.6297 |
-|      24 | 0.0188 |        0.0486 |       0.0774 |       0.1049 |       0.1064 |                      477.557 |                -11.9395 |                 494.446 |                  7.5967 |        0.8668 |
-|      25 | 0.0229 |        0.0608 |       0.0867 |       0.1211 |       0.1507 |                      348.804 |                  5.3412 |                 341.955 |                 30.8778 |        1.5065 |
-|      26 | 0.019  |        0.0544 |       0.0814 |       0.1063 |       0.119  |                      467.791 |                170.149  |                 466.67  |                186.732  |        1.8434 |
-|      27 | 0.0154 |        0.047  |       0.0734 |       0.1014 |       0.1102 |                      426.202 |                -78.8968 |                 417.572 |                -78.867  |        1.8177 |
-|      28 | 0.0159 |        0.0455 |       0.0724 |       0.0983 |       0.1051 |                      523.8   |                165.693  |                 512.567 |                150.064  |        1.7851 |
-|      29 | 0.0243 |        0.0498 |       0.0873 |       0.112  |       0.1309 |                      481.491 |                 55.202  |                 483.593 |                 59.5569 |        1.7285 |
-|      30 | 0.021  |        0.054  |       0.0808 |       0.1097 |       0.1232 |                      508.089 |                200.01   |                 496.295 |                194.816  |        1.7602 |
-|      31 | 0.0186 |        0.0479 |       0.0771 |       0.1047 |       0.1351 |                      422.298 |                 80.0045 |                 421.175 |                 97.6633 |        1.532  |
-|      32 | 0.0205 |        0.0589 |       0.0793 |       0.1129 |       0.1308 |                      395.582 |                -12.36   |                 400.106 |                  6.3091 |        1.5378 |
-|      33 | 0.0129 |        0.0396 |       0.0679 |       0.0923 |       0.0953 |                      431.082 |                  7.8286 |                 428.801 |                  8.6182 |        1.8789 |
-|      34 | 0.0144 |        0.0412 |       0.0662 |       0.0893 |       0.0979 |                      530.599 |                179.193  |                 532.033 |                158.92   |        1.8429 |
-|      35 | 0.0139 |        0.0424 |       0.0716 |       0.0945 |       0.1006 |                      430.982 |                  7.3476 |                 428.805 |                 -4.3425 |        1.711  |
-|      36 | 0.0167 |        0.043  |       0.0702 |       0.0975 |       0.1217 |                      381.859 |                -45.0215 |                 376.432 |                -65.0582 |        1.4227 |
-|      37 | 0.021  |        0.0516 |       0.0772 |       0.1106 |       0.1302 |                      348.402 |                -84.0741 |                 347.672 |                -69.1513 |        1.5184 |
-|      38 | 0.029  |        0.0585 |       0.0895 |       0.1188 |       0.1347 |                      596.764 |                287.068  |                 586.433 |                236.509  |        1.6109 |
-|      39 | 0.0176 |        0.0472 |       0.0758 |       0.1006 |       0.1115 |                      470.259 |                 25.2451 |                 468.965 |                 38.1292 |        1.7815 |
-|      40 | 0.0309 |        0.0583 |       0.0827 |       0.1163 |       0.1649 |                      579.514 |                186.451  |                 587.644 |                177.782  |        1.6365 |
-|      41 | 0.0188 |        0.0516 |       0.0776 |       0.1084 |       0.1369 |                      349.04  |               -106.107  |                 341.44  |                -94.3054 |        1.4013 |
-|      42 | 0.014  |        0.0424 |       0.0673 |       0.0964 |       0.0977 |                      477.916 |                120.4    |                 474.075 |                116.718  |        1.8973 |
-|      43 | 0.0171 |        0.0476 |       0.071  |       0.1054 |       0.1116 |                      423.233 |                 50.4327 |                 420.448 |                 69.2674 |        1.8893 |
-|      44 | 0.0247 |        0.0613 |       0.0799 |       0.1171 |       0.141  |                      426.292 |                 -2.5913 |                 422.69  |                 20.4068 |        1.4871 |
-|      45 | 0.0161 |        0.0431 |       0.0736 |       0.0959 |       0.1007 |                      538.835 |                 71.1159 |                 544.14  |                 89.5933 |        1.7929 |
-|      46 | 0.017  |        0.0442 |       0.0722 |       0.0986 |       0.1175 |                      361.974 |               -136.836  |                 359.692 |               -151.266  |        1.4659 |
-|      47 | 0.0186 |        0.046  |       0.0778 |       0.1076 |       0.1114 |                      502.144 |                 80.8261 |                 499.45  |                102.07   |        1.9431 |
-[2025-12-01 08:19:42,350][training][INFO] - R2 score for lift: 0.9824
-[2025-12-01 08:19:42,350][training][INFO] - R2 score for drag: 0.9904
-[2025-12-01 08:19:42,351][training][INFO] - Summary:
-| Batch   |   Loss |   L2 Pressure |   L2 Shear X |   L2 Shear Y |   L2 Shear Z |   Predicted Drag Coefficient |   Pred Lift Coefficient |   True Drag Coefficient |   True Lift Coefficient |   Elapsed (s) |
-|---------|--------|---------------|--------------|--------------|--------------|------------------------------|-------------------------|-------------------------|-------------------------|---------------|
-| Mean    | 0.0191 |        0.0496 |       0.0775 |       0.1047 |       0.1191 |                      456.371 |                 51.6484 |                 453.193 |                  53.624 |        1.8114 |
-```
-
-  <!-- Alternatively, the model can be used
-directly on `.vtp` or `.stl` files as shown in `inference_on_vtp.py`.  Note that the
-script contains several parameters from the DrivaerML dataset as hardcoded variable
-names: `CpMeanTrim`, `pMeanTrim`, `wallShearStressMeanTrim`, which are used to
-compute the L2 metrics on the inference outputs. -->
-
-<!-- In `inference_on_zarr.py`, the dataset examples are downsampled and preprocessed
-exactly as in the training script.  In `inference_on_vtp.py`, however, the entire
-mesh is processed.  To enable the mesh to fit into GPU memory, the mesh is chunked
-into pieces that are then processed, and recombined to form the prediction on the
-entire mesh.  The outputs are then saved to .vtp files for downstream analysis. -->
-
-## Transolver++
-
-Transolver++ is supported with the `plus` flag to the model.  In
-our experiments, we did not see gains, but you are welcome to try it and share
-your results with us on GitHub!
diff --git a/examples/cfd/external_aerodynamics/typhon/requirements.txt b/examples/cfd/external_aerodynamics/typhon/requirements.txt
deleted file mode 100644
index ffc351ec7b..0000000000
--- a/examples/cfd/external_aerodynamics/typhon/requirements.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-hydra-core
-tabulate
-tensorboard
-termcolor
-torchinfo
-einops
-transformer_engine[pytorch]
-tensorstore
-zarr>=3.0
diff --git a/examples/cfd/external_aerodynamics/typhon/src/benchmark_dataloading.py b/examples/cfd/external_aerodynamics/typhon/src/benchmark_dataloading.py
deleted file mode 100644
index fe58c3a240..0000000000
--- a/examples/cfd/external_aerodynamics/typhon/src/benchmark_dataloading.py
+++ /dev/null
@@ -1,166 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This is a standalone script for benchmarking and testing the Transolver
-datapipe in surface or volume mode.
-"""
-
-from pathlib import Path
-
-import time
-import os
-import re
-import torch
-
-import numpy as np
-
-from typing import Literal, Any
-
-
-import hydra
-from omegaconf import DictConfig, OmegaConf
-
-
-import torch.distributed as dist
-from torch.utils.data.distributed import DistributedSampler
-
-
-from physicsnemo.distributed import DistributedManager
-from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
-
-from physicsnemo.datapipes.cae.transolver_datapipe import (
-    create_transolver_dataset,
-)
-
-
-from physicsnemo.utils.profiling import profile, Profiler
-
-
-@profile
-def main(cfg: DictConfig):
-    """Main training function
-
-    Args:
-        cfg: Hydra configuration object
-    """
-
-    DistributedManager.initialize()
-
-    # Set up distributed training
-    dist_manager = DistributedManager()
-
-    # Set up logging
-    logger = RankZeroLoggingWrapper(PythonLogger(name="training"), dist_manager)
-
-    logger.info(f"Config:\n{OmegaConf.to_yaml(cfg, resolve=True)}")
-
-    # Load the normalization file:
-    norm_dir = getattr(cfg.data, "normalization_dir", ".")
-    if cfg.data.mode == "surface":
-        norm_file = str(Path(norm_dir) / "surface_fields_normalization.npz")
-    elif cfg.data.mode == "volume":
-        norm_file = str(Path(norm_dir) / "volume_fields_normalization.npz")
-
-    norm_data = np.load(norm_file)
-    norm_factors = {
-        "mean": torch.from_numpy(norm_data["mean"]).to(dist_manager.device),
-        "std": torch.from_numpy(norm_data["std"]).to(dist_manager.device),
-    }
-    # Training dataset
-
-    train_dataloader = create_transolver_dataset(
-        cfg.data,
-        phase="train",
-        scaling_factors=norm_factors,
-    )
-
-    # Validation dataset
-
-    val_dataloader = create_transolver_dataset(
-        cfg.data,
-        phase="val",
-        scaling_factors=norm_factors,
-    )
-
-    num_replicas = dist_manager.world_size
-    data_rank = dist_manager.rank
-
-    # Set up distributed samplers
-    train_sampler = torch.utils.data.distributed.DistributedSampler(
-        train_dataloader,
-        num_replicas=num_replicas,
-        rank=data_rank,
-        shuffle=True,
-        drop_last=True,
-    )
-
-    val_sampler = torch.utils.data.distributed.DistributedSampler(
-        val_dataloader,
-        num_replicas=num_replicas,
-        rank=data_rank,
-        shuffle=False,  # No shuffling for validation
-        drop_last=True,
-    )
-
-    # Training loop
-    logger.info("Starting IO benchmark...")
-    for epoch in range(1):
-        # Set the epoch in the samplers
-        train_sampler.set_epoch(epoch)
-        val_sampler.set_epoch(epoch)
-        train_dataloader.dataset.set_indices(list(train_sampler))
-        val_dataloader.dataset.set_indices(list(val_sampler))
-
-        start_time = time.time()
-        # Training phase
-        start = time.time()
-        with Profiler():
-            for i_batch, data in enumerate(train_dataloader):
-                print(f"Train {i_batch} elapsed time: {time.time() - start}")
-                start = time.time()
-
-        end_time = time.time()
-        train_duration = end_time - start_time
-
-        # Log epoch results
-        logger.info(
-            f"Epoch [{epoch}/{cfg.training.num_epochs}] [duration: {train_duration:.2f}s]"
-        )
-
-    logger.info("Benchmark completed!")
-
-
-@hydra.main(version_base=None, config_path="conf", config_name="train_surface")
-def launch(cfg: DictConfig):
-    """Launch training with hydra configuration
-
-    Args:
-        cfg: Hydra configuration object
-    """
-
-    # If you want to use `line_profiler` or PyTorch's profiler, enable them here.
-
-    profiler = Profiler()
-    if cfg.profile:
-        profiler.enable("torch")
-    profiler.initialize()
-    main(cfg)
-    profiler.finalize()
-
-
-if __name__ == "__main__":
-    launch()
diff --git a/examples/cfd/external_aerodynamics/typhon/src/compute_normalizations.py b/examples/cfd/external_aerodynamics/typhon/src/compute_normalizations.py
deleted file mode 100644
index c75fdc9af7..0000000000
--- a/examples/cfd/external_aerodynamics/typhon/src/compute_normalizations.py
+++ /dev/null
@@ -1,159 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: MIT License
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-"""
-This file provides utilities to compute normalization statistics (mean, std, min, max)
-for a given field in a dataset, typically used for preprocessing in CFD workflows.
-"""
-
-from pathlib import Path
-import time
-
-import numpy as np
-import torch
-import hydra
-from omegaconf import DictConfig
-
-from physicsnemo.datapipes.cae.cae_dataset import CAEDataset
-
-
-def compute_mean_std_min_max(
-    dataset: CAEDataset,
-    field_key: str,
-    max_samples: int = 100,
-) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-    """
-    Compute the mean, standard deviation, minimum, and maximum for a specified field
-    across all samples in a dataset.
-
-    Uses a numerically stable online algorithm for mean and variance.
-
-    Args:
-        dataset (CAEDataset): The dataset to process.
-        field_key (str): The key for the field to normalize.
-
-    Returns:
-        tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-            mean, std, min, max tensors for the field.
-    """
-    N = torch.tensor(
-        0, dtype=torch.int64, device="cpu"
-    )  # Total number of elements processed
-    mean = None
-    M2 = None  # Sum of squares of differences from the current mean
-    min_val = None
-    max_val = None
-
-    time_start = time.time()
-    for i in range(len(dataset)):
-        print(f"reading file: {i}")
-        data = dataset[i][field_key]
-        if mean is None:
-            # Initialize accumulators based on the shape of the data
-            mean = torch.zeros(data.shape[-1], device=data.device)
-            M2 = torch.zeros(data.shape[-1], device=data.device)
-            min_val = torch.full((data.shape[-1],), float("inf"), device=data.device)
-            max_val = torch.full((data.shape[-1],), float("-inf"), device=data.device)
-        n = data.shape[0]
-        N += n
-
-        # Compute batch statistics
-        batch_mean = data.mean(axis=(0,))
-        batch_M2 = ((data - batch_mean) ** 2).sum(axis=(0,))
-        batch_n = data.shape[0]
-
-        # Update min/max
-        batch_min = data.amin(dim=(0,))
-        batch_max = data.amax(dim=(0,))
-        min_val = torch.minimum(min_val, batch_min)
-        max_val = torch.maximum(max_val, batch_max)
-
-        # Update running mean and M2 (Welford's algorithm)
-        delta = batch_mean - mean
-        N += batch_n
-        mean = mean + delta * (batch_n / N)
-        M2 = M2 + batch_M2 + delta**2 * (batch_n * N) / N
-        time_end = time.time()
-        print(f"Time taken for file {i}: {time_end - time_start:.2f} seconds")
-        time_start = time.time()
-        if i >= max_samples:
-            break
-
-    var = M2 / (N - 1)
-    std = torch.sqrt(var)
-    return mean, std, min_val, max_val
-
-
-@hydra.main(version_base="1.3", config_path="conf", config_name="train_surface")
-def main(cfg: DictConfig) -> None:
-    """
-    Script entry point for computing normalization statistics for a specified field
-    in a dataset, using configuration from a YAML file.
-
-    The computed statistics are printed and saved to a .npz file.
-    """
-
-    # Choose which field to normalize (can be overridden via command line)
-    field_key: str = cfg.datapipe.mode + "_fields"
-
-    # Normalization directory can be configured (backward compatible: defaults to current directory)
-    normalization_dir: str = getattr(cfg.datapipe, "normalization_dir", ".")
-
-    # Construct full path using pathlib (cross-platform, concise)
-    workspace_path: str = str(
-        Path(normalization_dir) / f"{field_key}_normalization.npz"
-    )
-
-    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
-
-    # Create the dataset using configuration parameters
-    dataset = CAEDataset(
-        data_dir=cfg.datapipe.train.data_path,
-        keys_to_read=[
-            field_key,
-        ],
-        keys_to_read_if_available={},
-        output_device=device,
-        preload_depth=cfg.datapipe.preload_depth,
-        pin_memory=cfg.datapipe.pin_memory,
-    )
-    # Compute normalization statistics
-    mean, std, min_val, max_val = compute_mean_std_min_max(dataset, field_key, 100)
-    print(f"Mean for {field_key}: {mean}")
-    print(f"Std for {field_key}: {std}")
-    print(f"Min for {field_key}: {min_val}")
-    print(f"Max for {field_key}: {max_val}")
-
-    # Save statistics to configured workspace path
-    print(f"Saving normalization statistics to: {workspace_path}")
-    np.savez(
-        workspace_path,
-        mean=mean.cpu().numpy(),
-        std=std.cpu().numpy(),
-        min=min_val.cpu().numpy(),
-        max=max_val.cpu().numpy(),
-    )
-    print(f"Successfully saved normalization file: {workspace_path}")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/combined.yaml b/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/combined.yaml
deleted file mode 100644
index e4bcbd16c1..0000000000
--- a/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/combined.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# You may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-defaults:
-  -  core
-
-# Overrides for combined data:
-mode: combined
-
-# combined-speficic needs:
-data_keys:
-  - "volume_fields"
-  - "volume_mesh_centers"
-  - "surface_fields"
-  - "surface_mesh_centers"
-  - "surface_normals"
-  - "surface_areas"
-  - "air_density"
-  - "stream_velocity"
-  - "stl_faces"
-  - "stl_centers"
-  - "stl_coordinates"
\ No newline at end of file
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/volume.yaml b/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/volume.yaml
deleted file mode 100644
index b222fda5f9..0000000000
--- a/examples/cfd/external_aerodynamics/typhon/src/conf/datapipe/volume.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# You may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-defaults:
-  -  core
-
-# Overrides for volume data:
-mode: volume
-
-# volume-speficic needs:
-data_keys:
-  - "volume_fields"
-  - "volume_mesh_centers"
-  - "air_density"
-  - "stream_velocity"
-  - "stl_faces"
-  - "stl_centers"
-  - "stl_coordinates"
\ No newline at end of file
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/model/transolver.yaml b/examples/cfd/external_aerodynamics/typhon/src/conf/model/transolver.yaml
deleted file mode 100644
index c43fb8560c..0000000000
--- a/examples/cfd/external_aerodynamics/typhon/src/conf/model/transolver.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-_target_: physicsnemo.models.transolver.Transolver
-functional_dim: 2
-out_dim: 4
-embedding_dim: 6
-n_layers: 8
-n_hidden: 256
-dropout: 0.0
-n_head: 8
-act: "gelu"
-mlp_ratio: 2
-slice_num: 512
-unified_pos: false
-ref: 8
-structured_shape: null
-use_te: false
-time_input: false
-plus: false
-
diff --git a/examples/cfd/external_aerodynamics/typhon/src/conf/training/base.yaml b/examples/cfd/external_aerodynamics/typhon/src/conf/training/base.yaml
deleted file mode 100644
index 18797ea051..0000000000
--- a/examples/cfd/external_aerodynamics/typhon/src/conf/training/base.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-num_epochs: 501
-save_interval: 25
-
-scheduler:
-  name: "StepLR"
-  params:
-    step_size: 100
-    gamma: 0.5
-
-optimizer:
-  _target_: torch.optim.AdamW
-  lr: 1.0e-3
-  weight_decay: 1.0e-4
-  betas: [0.9, 0.999]
-  eps: 1.0e-8
-
diff --git a/examples/cfd/external_aerodynamics/typhon/src/metrics.py b/examples/cfd/external_aerodynamics/typhon/src/metrics.py
deleted file mode 100644
index 34dbb74aff..0000000000
--- a/examples/cfd/external_aerodynamics/typhon/src/metrics.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-import torch.distributed as dist
-from physicsnemo.distributed import ShardTensor
-from physicsnemo.distributed import DistributedManager
-
-from utils import tensorwise
-
-
-def all_reduce_dict(
-    metrics: dict[str, torch.Tensor], dm: DistributedManager
-) -> dict[str, torch.Tensor]:
-    """
-    Reduces a dictionary of metrics across all distributed processes.
-
-    Args:
-        metrics: Dictionary of metric names to torch.Tensor values.
-        dm: DistributedManager instance for distributed context.
-
-    Returns:
-        Dictionary of reduced metrics.
-    """
-    # TODO - update this to use domains and not the full world
-
-    if dm.world_size == 1:
-        return metrics
-
-    # Pack the metrics together:
-    merged_metrics = torch.stack(list(metrics.values()), dim=-1)
-
-    dist.all_reduce(merged_metrics)
-    merged_metrics = merged_metrics / dm.world_size
-
-    # Unstack metrics:
-    metrics = {key: merged_metrics[i] for i, key in enumerate(metrics.keys())}
-    return metrics
-
-
-@tensorwise
-def metrics_fn(
-    pred: torch.Tensor,
-    target: torch.Tensor,
-    dm: DistributedManager,
-    mode: str,
-) -> dict[str, torch.Tensor]:
-    """
-    Computes metrics for either surface or volume data.
-
-    Args:
-        pred: Predicted values (unnormalized).
-        target: Target values (unnormalized).
-        others: Dictionary containing normalization statistics.
-        dm: DistributedManager instance for distributed context.
-        mode: Either "surface" or "volume".
-
-    Returns:
-        Dictionary of computed metrics.
-    """
-    with torch.no_grad():
-        if mode == "surface":
-            metrics = metrics_fn_surface(pred, target, dm)
-        elif mode == "volume":
-            metrics = metrics_fn_volume(pred, target, dm)
-        else:
-            raise ValueError(f"Unknown data mode: {mode}")
-
-        metrics = all_reduce_dict(metrics, dm)
-        return metrics
-
-
-def metrics_fn_volume(
-    pred: torch.Tensor,
-    target: torch.Tensor,
-    dm: DistributedManager,
-) -> dict[str, torch.Tensor]:
-    """
-    Placeholder for volume metrics computation.
-
-    Args:
-        pred: Predicted values.
-        target: Target values.
-        others: Dictionary containing additional statistics.
-        dm: DistributedManager instance for distributed context.
-        norm_factors: Dictionary of normalization factors.
-
-    Raises:
-        NotImplementedError: Always, as this function is not yet implemented.
-    """
-    l2_num = (pred - target) ** 2
-    l2_num = torch.sum(l2_num, dim=1)
-    l2_num = torch.sqrt(l2_num)
-
-    l2_denom = target**2
-    l2_denom = torch.sum(l2_denom, dim=1)
-    l2_denom = torch.sqrt(l2_denom)
-
-    l2 = l2_num / l2_denom
-
-    metrics = {
-        "l2_pressure_vol": torch.mean(l2[:, 3]),
-        "l2_velocity_x": torch.mean(l2[:, 0]),
-        "l2_velocity_y": torch.mean(l2[:, 1]),
-        "l2_velocity_z": torch.mean(l2[:, 2]),
-        "l2_nut": torch.mean(l2[:, 4]),
-    }
-
-    return metrics
-
-
-def metrics_fn_surface(
-    pred: torch.Tensor,
-    target: torch.Tensor,
-    dm: DistributedManager,
-) -> dict[str, torch.Tensor]:
-    """
-    Computes L2 surface metrics between prediction and target.
-
-    Args:
-        pred: Predicted values (normalized).
-        target: Target values (normalized).
-        others: Dictionary containing normalization statistics.
-        dm: DistributedManager instance for distributed context.
-        norm_factors: Dictionary with 'mean' and 'std' for unnormalization.
-
-    Returns:
-        Dictionary of L2 surface metrics for pressure and shear components.
-    """
-    # Unnormalize the surface values for L2:
-    # target = target * norm_factors["std"] + norm_factors["mean"]
-    # pred = pred * norm_factors["std"] + norm_factors["mean"]
-
-    l2_num = (pred - target) ** 2
-    l2_num = torch.sum(l2_num, dim=1)
-    l2_num = torch.sqrt(l2_num)
-
-    l2_denom = target**2
-    l2_denom = torch.sum(l2_denom, dim=1)
-    l2_denom = torch.sqrt(l2_denom)
-
-    l2 = l2_num / l2_denom
-
-    metrics = {
-        "l2_pressure_surf": torch.mean(l2[:, 0]),
-        "l2_shear_x": torch.mean(l2[:, 1]),
-        "l2_shear_y": torch.mean(l2[:, 2]),
-        "l2_shear_z": torch.mean(l2[:, 3]),
-    }
-
-    return metrics
diff --git a/examples/cfd/external_aerodynamics/typhon/src/preprocess.py b/examples/cfd/external_aerodynamics/typhon/src/preprocess.py
deleted file mode 100644
index b85ff4dff6..0000000000
--- a/examples/cfd/external_aerodynamics/typhon/src/preprocess.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-from physicsnemo.distributed.shard_tensor import ShardTensor
-from physicsnemo.utils.profiling import profile
-
-
-@profile
-def preprocess_surface_data(
-    batch: dict,
-    norm_factors: dict[str, torch.Tensor],
-) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
-    """
-    Preprocess the surface data.  The functional input
-    is the air density and stream velocity.  The embeddings
-    are the surface mesh centers and normals.  The targets are
-    normalized to mean of 0, std 1.  We cache the mean and std
-    to de-normalize when computing the metrics.
-    """
-
-    mesh_centers = batch["surface_mesh_centers"]
-    normals = batch["surface_normals"]
-    targets = batch["surface_fields"]
-    node_features = torch.stack(
-        [batch["air_density"], batch["stream_velocity"]], dim=-1
-    ).to(torch.float32)
-
-    # Normalize the surface fields:
-    targets = (targets - norm_factors["mean"]) / norm_factors["std"]
-
-    # If you want to use this, be sure to update the
-    # functional_dim value in your configuration
-
-    # fourier_sin_features = [
-    #     torch.sin(mesh_centers * (2 ** i) * torch.pi)
-    #     for i in range(4)
-    # ]
-    # fourier_cos_features = [
-    #     torch.cos(mesh_centers * (2 ** i) * torch.pi)
-    #     for i in range(4)
-    # ]
-
-    # Calculate center of mass
-    sizes = batch["stl_areas"]
-    centers = batch["stl_centers"]
-
-    total_weighted_position = torch.einsum("ki,kij->kj", sizes, centers)
-    total_size = torch.sum(sizes)
-    center_of_mass = total_weighted_position[None, ...] / total_size
-
-    # Subtract the COM from the centers:
-    mesh_centers = mesh_centers - center_of_mass
-
-    embeddings = torch.cat(
-        [
-            mesh_centers,
-            normals,
-            # *fourier_sin_features,
-            # *fourier_cos_features
-        ],
-        dim=-1,
-    )
-
-    others = {
-        "surface_areas": sizes,
-        "surface_normals": normals,
-        "stream_velocity": batch["stream_velocity"],
-        "air_density": batch["air_density"],
-    }
-
-    return node_features, embeddings, targets, others
-
-
-@profile
-def downsample_surface(
-    features: torch.Tensor,
-    embeddings: torch.Tensor,
-    targets: torch.Tensor,
-    num_keep=1024,
-):
-    if num_keep == -1:
-        features = features.unsqueeze(1).expand(1, embeddings.shape[1], -1)
-        return features, embeddings, targets
-
-    """
-    Downsample the surface data. We generate one set of indices, and
-    use it to sample the same points from the features, embeddings,
-    and targets.  Using torch.multinomial to sample without replacement.
-    """
-
-    num_samples = embeddings.shape[1]
-    # Generate random indices to keep (faster for large num_samples)
-    indices = torch.multinomial(
-        torch.ones(num_samples, device=features.device), num_keep, replacement=False
-    )
-
-    # Use the same indices to downsample all tensors
-    downsampled_embeddings = embeddings[:, indices]
-    downsampled_targets = targets[:, indices]
-    # This unsqueezes the features (air density and stream velocity) to
-    # the same shape as the embeddings
-    downsampled_features = features.unsqueeze(1).expand(
-        1, downsampled_embeddings.shape[1], -1
-    )
-
-    return downsampled_features, downsampled_embeddings, downsampled_targets
diff --git a/examples/cfd/external_aerodynamics/typhon/src/surface_fields_normalization.npz b/examples/cfd/external_aerodynamics/typhon/src/surface_fields_normalization.npz
deleted file mode 100644
index 228f7550cc4be6993352c81bb56def3b95cd036d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1040
zcmWIWW@gc4fB;2?g_0Tf|3d)>g9t-zYGR&VUO^=zg8;(>s45se*)P;LAd-=xjG<aR
zCAB!YNZm?7-6qXMT}MGZEx)LwC^0WSzbGXYB<_}&Q=AGEFV09TNComWOmsAL6lxVH
z09=Roy!SB~b?nlMSz)U8rEW(6iceN^A6N<V2|Lgy#U&|&)gzKGP3_`sH(8yC%CItW
zw6{X_$iMk>cO!cwH*?U&NPn&AAsLs+dnfT%@AXgrv=`MQTbRVJqIe{+Vz9<Yf1|d8
zYF@Z)my?|Bj|q=#1H2iTbeU1J2`C$YFb6bSfT#vW5DCjP=(<2j5~hnCq!0=k7*7FN
w&;*LE2^9U<HBAOu22Bj;nm|E~T~iDbP!lBJ1H4(;KrUbb!mB`9l^MhX05f;!Pyhe`

diff --git a/examples/cfd/external_aerodynamics/typhon/src/train.py b/examples/cfd/external_aerodynamics/typhon/src/train.py
deleted file mode 100644
index 5fa1c82775..0000000000
--- a/examples/cfd/external_aerodynamics/typhon/src/train.py
+++ /dev/null
@@ -1,900 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Core python imports:
-import os
-import time
-from pathlib import Path
-from typing import Literal, Any, Callable, Sequence
-import collections
-from contextlib import nullcontext
-
-from collections.abc import Sequence
-
-# Configuration:
-import hydra
-import omegaconf
-from omegaconf import DictConfig
-
-# Pytorch imports:
-import torch
-from torch.optim import Optimizer
-from torch.amp import autocast, GradScaler
-from torch.utils.tensorboard import SummaryWriter
-
-import torch.distributed as dist
-
-# For metrics and model printouts:
-from tabulate import tabulate
-import torchinfo
-
-# For loading dataset stats:
-import numpy as np
-
-# Physicsnemo imports ...
-from physicsnemo.launch.utils import load_checkpoint, save_checkpoint
-from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
-from physicsnemo.distributed import DistributedManager
-from physicsnemo.utils.profiling import profile, Profiler
-from physicsnemo.datapipes.cae.transolver_datapipe import (
-    create_transolver_dataset,
-    TransolverDataPipe,
-)
-
-# Local folder imports for this example
-from metrics import metrics_fn
-from preprocess import (
-    preprocess_surface_data,
-    downsample_surface,
-)
-
-# tensorwise is to handle single-point-cloud or multi-point-cloud running.
-# it's a decorator that will automatically unzip one or more of a list of tensors,
-# run the funtcion, and rezip the results.
-from utils import tensorwise
-
-# Special import, if transformer engine is available:
-from physicsnemo.utils.version_check import check_min_version
-
-TE_AVAILABLE = check_min_version("transformer_engine", "0.0.0", hard_fail=False)
-
-if TE_AVAILABLE:
-    import transformer_engine.pytorch as te
-    from transformer_engine.common.recipe import Format, DelayedScaling
-else:
-    te, Format, DelayedScaling = None, None, None
-
-# This will go away when checkpointing is refined further below:
-torch.serialization.add_safe_globals([omegaconf.listconfig.ListConfig])
-torch.serialization.add_safe_globals([omegaconf.base.ContainerMetadata])
-torch.serialization.add_safe_globals([Any])
-torch.serialization.add_safe_globals([list])
-torch.serialization.add_safe_globals([collections.defaultdict])
-torch.serialization.add_safe_globals([dict])
-torch.serialization.add_safe_globals([int])
-torch.serialization.add_safe_globals([omegaconf.nodes.AnyNode])
-torch.serialization.add_safe_globals([omegaconf.base.Metadata])
-
-
-class CombinedOptimizer(Optimizer):
-    """Combine multiple PyTorch optimizers into a single Optimizer-like interface.
-
-    The wrapper concatenates the *param_groups* from all contained optimizers so
-    that learning-rate schedulers (e.g., ReduceLROnPlateau, CosineAnnealingLR)
-    operate transparently across every parameter. Only a minimal subset of the
-    *torch.optim.Optimizer* API is implemented—extend as needed.
-
-    Note:
-        This will get upstreamed to physicsnemo shortly.  Don't count on this
-        class existing here in the future!
-
-        In other words, this is already marked for deprecation!
-    """
-
-    def __init__(
-        self,
-        optimizers: Sequence[Optimizer],
-        torch_compile_kwargs: dict[str, Any] | None = None,
-    ):
-        if not optimizers:
-            raise ValueError("`optimizers` must contain at least one optimizer.")
-
-        self.optimizers = optimizers
-
-        # Collect parameter groups from all optimizers. We pass an empty
-        # *defaults* dict because hyper-parameters are managed by the inner
-        # optimizers, not this wrapper.
-        param_groups = [g for opt in optimizers for g in opt.param_groups]
-        super().__init__(param_groups, defaults={})
-
-        if torch_compile_kwargs is None:
-            self.step_fns: list[Callable] = [opt.step for opt in optimizers]
-        else:
-            self.step_fns: list[Callable] = [
-                torch.compile(opt.step, **torch_compile_kwargs) for opt in optimizers
-            ]
-
-    def zero_grad(self, *args, **kwargs) -> None:
-        """Nullify gradients"""
-        for opt in self.optimizers:
-            opt.zero_grad(*args, **kwargs)
-
-    def step(self, closure=None) -> None:
-        for step_fn in self.step_fns:
-            if closure is None:
-                step_fn()
-            else:
-                step_fn(closure)
-
-    def state_dict(self):
-        return {"optimizers": [opt.state_dict() for opt in self.optimizers]}
-
-    def load_state_dict(self, state_dict):
-        for opt, sd in zip(self.optimizers, state_dict["optimizers"]):
-            opt.load_state_dict(sd)
-
-        self.param_groups = [g for opt in self.optimizers for g in opt.param_groups]
-
-
-def get_autocast_context(precision: str) -> nullcontext:
-    """
-    Returns the appropriate autocast context for mixed precision training.
-
-    Args:
-        precision (str): The desired precision. Supported values are "float16", "bfloat16", or any other string for no autocast.
-
-    Returns:
-        Context manager: An autocast context for the specified precision, or a nullcontext if precision is not recognized.
-    """
-    if precision == "float16":
-        return autocast("cuda", dtype=torch.float16)
-    elif precision == "bfloat16":
-        return autocast("cuda", dtype=torch.bfloat16)
-    elif precision == "float8" and TE_AVAILABLE:
-        fp8_format = Format.HYBRID
-        fp8_recipe = DelayedScaling(
-            fp8_format=fp8_format, amax_history_len=16, amax_compute_algo="max"
-        )
-        return te.fp8_autocast(enabled=True, fp8_recipe=fp8_recipe)
-    else:
-        return nullcontext()
-
-
-@tensorwise
-def cast_precisions(tensor: torch.Tensor, precision: str) -> torch.Tensor:
-    """
-    Casts the tensors to the specified precision.
-
-    We are careful to take either a tensor or list of tensors, and return the same format.
-    """
-
-    match precision:
-        case "float16":
-            dtype = torch.float16
-        case "bfloat16":
-            dtype = torch.bfloat16
-        case _:
-            dtype = None
-
-    if dtype is not None:
-        return tensor.to(dtype)
-    else:
-        return tensor
-
-
-@tensorwise
-def pad_input_for_fp8(
-    features: torch.Tensor,
-    embeddings: torch.Tensor,
-    geometry: torch.Tensor | None = None,
-) -> torch.Tensor:
-    """
-    Pads the input features tensor so that the concatenated feature and embedding dimension is a multiple of 16,
-    which is required for FP8 operations.  Only the features is updated.
-
-    Args:
-        features (torch.Tensor): The input features tensor of shape (..., feature_dim).
-        embeddings (torch.Tensor): The embeddings tensor of shape (..., embedding_dim).
-
-    Returns:
-        torch.Tensor: The padded features tensor, so that (features.shape[-1] + embeddings.shape[-1]) is a multiple of 16.
-    """
-    fx_dim = features.shape[-1] + embeddings.shape[-1]
-    if fx_dim % 16 != 0:
-        pad_size = 16 - (fx_dim % 16)
-        features = torch.nn.functional.pad(features, (0, pad_size))
-        fx_dim = features.shape[-1] + embeddings.shape[-1]
-
-    if geometry is not None:
-        geometry_dim = geometry.shape[-1] if geometry is not None else 0
-        if geometry_dim % 16 != 0:
-            pad_size = 16 - (geometry_dim % 16)
-            geometry = torch.nn.functional.pad(geometry, (0, pad_size))
-            geometry_dim = geometry.shape[-1]
-
-    return features, geometry
-
-
-@tensorwise
-def unpad_output_for_fp8(
-    outputs: torch.Tensor, output_pad_size: int | None
-) -> torch.Tensor:
-    """
-    Removes the padding from the output tensor that was added for FP8 compatibility.
-
-    Args:
-        outputs (torch.Tensor): The output tensor of shape (..., output_dim + pad_size) if padded.
-        output_pad_size (int | None): The number of padded elements to remove from the last dimension. If None, no unpadding is performed.
-
-    Returns:
-        torch.Tensor: The unpadded output tensor.
-    """
-    # Remove the padded outputs:
-    if output_pad_size is not None:
-        return outputs[:, :, :-output_pad_size]
-    return outputs
-
-
-@tensorwise
-def loss_fn(outputs: torch.Tensor, targets: torch.Tensor) -> torch.Tensor:
-    """
-    Compute the loss for the model.
-    """
-    return torch.nn.functional.mse_loss(outputs, targets)
-
-
-def forward_pass(
-    batch: dict,
-    model: torch.nn.Module,
-    precision: str,
-    output_pad_size: int | None,
-    dist_manager: DistributedManager,
-    data_mode: Literal["surface", "volume"],
-    datapipe: TransolverDataPipe,
-):
-    """
-    Run the forward pass of the model for one batch, including metrics and loss calculation.
-
-    Transolver takes just one tensor for features, embeddings.
-    Typhon takes a  list of tensors, for each.
-
-    Typhon needs a `geometry` tensor, so that's the switch we use to distinguish.
-
-    """
-
-    features = batch["fx"]
-    embeddings = batch["embeddings"]
-    targets = batch["fields"]
-
-    # Cast precisions:
-    features = cast_precisions(features, precision=precision)
-    embeddings = cast_precisions(embeddings, precision=precision)
-    if "geometry" in batch.keys():
-        geometry = cast_precisions(batch["geometry"], precision=precision)
-    else:
-        geometry = None
-
-    all_metrics = {}
-    if datapipe.config.model_type == "combined":
-        # This is hard coded for Typhon.  If you have more point clouds,
-        # your mileage may vary.
-        modes = ["surface", "volume"]
-    elif datapipe.config.model_type == "surface":
-        modes = [
-            "surface",
-        ]
-    elif datapipe.config.model_type == "volume":
-        modes = [
-            "volume",
-        ]
-
-    with get_autocast_context(precision):
-        # For fp8, we may have to pad the inputs:
-        if precision == "float8" and TE_AVAILABLE:
-            features, geometry = pad_input_for_fp8(features, embeddings, geometry)
-
-        if "geometry" in batch.keys():
-            # This is the Typhon path
-            outputs = model(
-                global_embedding=features, local_embedding=embeddings, geometry=geometry
-            )
-
-            outputs = unpad_output_for_fp8(outputs, output_pad_size)
-            # Loss per point cloud:
-            loss = loss_fn(outputs, targets)
-            # Log them too:
-            for i, mode in enumerate(modes):
-                all_metrics[f"loss/{mode}"] = loss[i]
-            # Averaging over point cloud inputs, instead of summing.
-            full_loss = torch.mean(torch.stack(loss))
-
-        else:
-            # This is the Transolver path
-            outputs = model(fx=features, embedding=embeddings)
-            outputs = unpad_output_for_fp8(outputs, output_pad_size)
-            full_loss = torch.nn.functional.mse_loss(outputs, targets)
-
-            all_metrics[f"loss/{modes[0]}"] = full_loss
-
-    air_density = batch["air_density"] if "air_density" in batch.keys() else None
-    stream_velocity = (
-        batch["stream_velocity"] if "stream_velocity" in batch.keys() else None
-    )
-
-    unscaled_outputs = tensorwise(datapipe.unscale_model_targets)(
-        outputs,
-        air_density=air_density,
-        stream_velocity=stream_velocity,
-        factor_type=modes,
-    )
-    unscaled_targets = tensorwise(datapipe.unscale_model_targets)(
-        targets,
-        air_density=air_density,
-        stream_velocity=stream_velocity,
-        factor_type=modes,
-    )
-    metrics = metrics_fn(unscaled_outputs, unscaled_targets, dist_manager, modes)
-
-    # In the combined mode, this is a list of dicts.  Merge them.
-    metrics = (
-        {k: v for d in metrics for k, v in d.items()}
-        if isinstance(metrics, list)
-        else metrics
-    )
-    all_metrics.update(metrics)
-
-    # if "geometry" in batch.keys():
-    #     print(f"HERE")
-    #     unscaled_outputs = []
-    #     unscaled_targets = []
-    #     for i in range(len(outputs)):
-    #         local_unscaled_outputs = datapipe.unscale_model_targets(
-    #             outputs[i],
-    #             air_density=air_density,
-    #             stream_velocity=stream_velocity,
-    #             factor_type=modes[i],
-    #         )
-    #         local_unscaled_targets = datapipe.unscale_model_targets(
-    #             targets[i],
-    #             air_density=air_density,
-    #             stream_velocity=stream_velocity,
-    #             factor_type=modes[i],
-    #         )
-    #         print(f"local_unscaled_outputs: {local_unscaled_outputs.shape}")
-    #         print(f"local_unscaled_targets: {local_unscaled_targets.shape}")
-    #         metrics = metrics_fn(local_unscaled_outputs, local_unscaled_targets, dist_manager, modes[i])
-    #         print(f"metrics: {metrics}")
-    #         all_metrics.update(metrics)
-    #         unscaled_outputs.append(local_unscaled_outputs)
-    #         unscaled_targets.append(local_unscaled_targets)
-    # else:
-    #     unscaled_outputs = datapipe.unscale_model_targets(
-    #         outputs,
-    #         air_density=air_density,
-    #         stream_velocity=stream_velocity,
-    #         factor_type=modes[0],
-    #     )
-    #     unscaled_targets = datapipe.unscale_model_targets(
-    #         targets,
-    #         air_density=air_density,
-    #         stream_velocity=stream_velocity,
-    #         factor_type=modes[0],
-    #     )
-
-    #     metrics = metrics_fn(unscaled_outputs, unscaled_targets, dist_manager, modes[0])
-    #     all_metrics.update(metrics)
-
-    return full_loss, all_metrics, (unscaled_outputs, unscaled_targets)
-
-
-@profile
-def train_epoch(
-    dataloader,
-    epoch_len: int,
-    model: torch.nn.Module,
-    output_pad_size: int | None,
-    optimizer: torch.optim.Optimizer,
-    scheduler: torch.optim.lr_scheduler._LRScheduler,
-    logger: PythonLogger,
-    writer: SummaryWriter,
-    epoch: int,
-    cfg: DictConfig,
-    dist_manager: DistributedManager,
-    scaler: GradScaler | None = None,
-) -> float:
-    """
-    Train the model for one epoch.
-
-    Args:
-        dataloader: Training data loader
-        model (torch.nn.Module): The neural network model to train.
-        epoch_len (int): Length of the epoch.
-        output_pad_size (int | None): Optional output padding size for lowest precisions (FP8).
-        optimizer (torch.optim.Optimizer): Optimizer for model parameters.
-        scheduler (torch.optim.lr_scheduler._LRScheduler): Learning rate scheduler.
-        logger (PythonLogger): Logger for training progress.
-        writer (SummaryWriter): TensorBoard writer for logging metrics.
-        epoch (int): Current epoch number.
-        cfg (DictConfig): Hydra configuration object.
-        dist_manager (DistributedManager): Distributed manager from physicsnemo.
-        scaler (GradScaler | None, optional): Gradient scaler for mixed precision training.
-    Returns:
-        float: The average training loss for the epoch.
-    """
-    model.train()
-    total_loss = 0
-    total_metrics = {}
-
-    precision = getattr(cfg, "precision", "float32")
-    start_time = time.time()
-
-    for i, batch in enumerate(dataloader):
-        # TransolverX has a different forward pass:
-
-        loss, metrics, _ = forward_pass(
-            batch,
-            model,
-            precision,
-            output_pad_size,
-            dist_manager,
-            cfg.datapipe.mode,
-            dataloader,
-        )
-
-        optimizer.zero_grad()
-        if precision == "float16" and scaler is not None:
-            scaler.scale(loss).backward()
-            scaler.step(optimizer)
-            scaler.update()
-        else:
-            loss.backward()
-            optimizer.step()
-
-        if not isinstance(scheduler, torch.optim.lr_scheduler.StepLR):
-            scheduler.step()
-
-        end_time = time.time()
-
-        # Logging
-        this_loss = loss.detach().item()
-        total_loss += this_loss
-
-        if i == 0:
-            total_metrics = metrics
-        else:
-            total_metrics = {
-                k: total_metrics[k] + metrics[k].item() for k in metrics.keys()
-            }
-
-        duration = end_time - start_time
-        start_time = end_time
-        images_per_second = 1 / duration
-
-        mem_usage = torch.cuda.memory_reserved() / 1024**3
-
-        logger.info(
-            f"Epoch {epoch} [{i}/{epoch_len}] Loss: {this_loss:.6f} Duration: {duration:.2f}s Mem: {mem_usage:.2f}GB"
-        )
-        if dist_manager.rank == 0:
-            writer.add_scalar(
-                "batch/learning_rate",
-                optimizer.param_groups[0]["lr"],
-                i + epoch_len * epoch,
-            )
-            writer.add_scalar("batch/loss", this_loss, i + epoch_len * epoch)
-            writer.add_scalar(
-                "batch/throughpu_per_gpu", images_per_second, i + epoch_len * epoch
-            )
-            for metric_name, metric_value in metrics.items():
-                writer.add_scalar(
-                    f"batch/{metric_name}", metric_value, i + epoch_len * epoch
-                )
-
-        if cfg.profile and i >= 10:
-            break  # Stop profiling after 10 batches
-
-    avg_loss = total_loss / epoch_len
-    avg_metrics = {k: v / epoch_len for k, v in total_metrics.items()}
-    if dist_manager.rank == 0:
-        writer.add_scalar("epoch/loss", avg_loss, epoch)
-        for metric_name, metric_value in avg_metrics.items():
-            writer.add_scalar(f"epoch/{metric_name}", metric_value, epoch)
-        # Print average metrics using tabulate
-        metrics_table = tabulate(
-            [[k, v] for k, v in avg_metrics.items()],
-            headers=["Metric", "Average Value"],
-            tablefmt="pretty",
-        )
-        print(f"\nEpoch {epoch} Average Metrics:\n{metrics_table}\n")
-    return avg_loss
-
-
-@profile
-def val_epoch(
-    dataloader,
-    epoch_len: int,
-    model: torch.nn.Module,
-    output_pad_size: int | None,
-    logger: PythonLogger,
-    val_writer: SummaryWriter,
-    epoch: int,
-    cfg: DictConfig,
-    dist_manager: DistributedManager,
-) -> float:
-    """
-    Run validation for one epoch.
-
-    Args:
-        dataloader: Validation data loader.
-        epoch_len (int): Length of the epoch.
-        model (torch.nn.Module): The model to evaluate.
-        output_pad_size (int | None): Optional output padding size for lowest precisions (FP8).
-        logger (PythonLogger): Logger for validation progress.
-        val_writer (SummaryWriter): TensorBoard writer for logging validation metrics.
-        epoch (int): Current epoch number.
-        cfg (DictConfig): Hydra configuration object.
-        dist_manager (DistributedManager): Distributed manager instance.
-    Returns:
-        float: The average validation loss for the epoch.
-    """
-
-    model.eval()  # Set model to evaluation mode
-    total_loss = 0
-    total_metrics = {}
-
-    precision = getattr(cfg.training, "precision", "float32")
-
-    start_time = time.time()
-    with torch.no_grad():  # Disable gradient computation
-        for i, batch in enumerate(dataloader):
-            loss, metrics, _ = forward_pass(
-                batch,
-                model,
-                precision,
-                output_pad_size,
-                dist_manager,
-                cfg.datapipe.mode,
-                dataloader,
-            )
-
-            if i == 0:
-                total_metrics = metrics
-            else:
-                total_metrics = {
-                    k: total_metrics[k] + metrics[k].item() for k in metrics.keys()
-                }
-
-            # Logging
-            this_loss = loss.detach().item()
-            total_loss += this_loss
-
-            end_time = time.time()
-            duration = end_time - start_time
-            start_time = end_time
-
-            logger.info(
-                f"Val [{i}/{epoch_len}] Loss: {this_loss:.6f} Duration: {duration:.2f}s"
-            )
-            # We don't add individual loss measurements to tensorboard in the validation loop.
-
-            if cfg.profile and i >= 10:
-                break  # Stop profiling after 10 batches
-
-    avg_loss = total_loss / epoch_len
-    avg_metrics = {k: v / epoch_len for k, v in total_metrics.items()}
-    if dist_manager.rank == 0:
-        val_writer.add_scalar("epoch/loss", avg_loss, epoch)
-        for metric_name, metric_value in avg_metrics.items():
-            val_writer.add_scalar(f"epoch/{metric_name}", metric_value, epoch)
-        # Print average metrics using tabulate
-        metrics_table = tabulate(
-            [[k, v] for k, v in avg_metrics.items()],
-            headers=["Metric", "Average Value"],
-            tablefmt="pretty",
-        )
-        print(f"\nEpoch {epoch} Validation Average Metrics:\n{metrics_table}\n")
-    return avg_loss
-
-
-def update_model_params_for_fp8(cfg, logger) -> tuple | None:
-    """
-    Adjusts model configuration parameters to ensure compatibility with FP8 computations.
-
-    The output shape will be padded to a multiple of 16.  The input shape
-    is padded dynamically in the forward pass, but that is printed here
-    for information.
-
-    Args:
-        cfg: Configuration object with model and training attributes.
-        logger: Logger object for info messages.
-
-    Returns:
-        tuple: (cfg, output_pad_size) if precision is "float8", where output_pad_size is the amount
-               of padding added to the output dimension (or None if no padding was needed).
-    """
-    # we have to manipulate the output shape
-    # to enable fp8 computations with transformer_engine.
-    # need the input and output to be divisible by 16.
-    # if (cfg.model.embedding_dim + cfg.model.functional_dim) % 16 != 0:
-
-    output_pad_size = None
-    if cfg.precision == "float8":
-        if cfg.model.out_dim % 16 != 0:
-            # pad the output:
-            output_pad_size = 16 - (cfg.model.out_dim % 16)
-            cfg.model.out_dim += output_pad_size
-            logger.info(
-                f"Padding output dimension to {cfg.model.out_dim} for fp8 autocast"
-            )
-
-        # This part is informational only:
-        if (cfg.model.functional_dim + cfg.model.embedding_dim) % 16 != 0:
-            input_pad_size = 16 - (
-                (cfg.model.functional_dim + cfg.model.embedding_dim) % 16
-            )
-            cfg.model.functional_dim += input_pad_size
-            logger.info(
-                f"Padding input dimension to {cfg.model.functional_dim} and {cfg.model.embedding_dim} for fp8 autocast"
-            )
-
-    return cfg, output_pad_size
-
-
-@profile
-def main(cfg: DictConfig):
-    """Main training function
-
-    Args:
-        cfg: Hydra configuration object
-    """
-
-    DistributedManager.initialize()
-
-    # Set up distributed training
-    dist_manager = DistributedManager()
-
-    # Set up logging
-    logger = RankZeroLoggingWrapper(PythonLogger(name="training"), dist_manager)
-
-    # Set checkpoint directory - defaults to output_dir if not specified
-    checkpoint_dir = getattr(cfg, "checkpoint_dir", None)
-    if checkpoint_dir is None:
-        checkpoint_dir = cfg.output_dir
-
-    if dist_manager.rank == 0:
-        os.makedirs(cfg.output_dir, exist_ok=True)
-        os.makedirs(checkpoint_dir, exist_ok=True)
-        writer = SummaryWriter(
-            log_dir=os.path.join(
-                cfg.output_dir + "/" + cfg.run_id + "/train",
-            )
-        )
-        val_writer = SummaryWriter(
-            log_dir=os.path.join(
-                cfg.output_dir + "/" + cfg.run_id + "/val",
-            )
-        )
-    else:
-        writer = None
-        val_writer = None
-
-    logger.info(f"Config:\n{omegaconf.OmegaConf.to_yaml(cfg, resolve=True)}")
-    logger.info(f"Output directory: {cfg.output_dir}/{cfg.run_id}")
-    logger.info(f"Checkpoint directory: {checkpoint_dir}/{cfg.run_id}/checkpoints")
-
-    cfg, output_pad_size = update_model_params_for_fp8(cfg, logger)
-
-    # Set up model
-    # (Using partial convert to get lists, etc., instead of ListConfigs.)
-    model = hydra.utils.instantiate(cfg.model, _convert_="partial")
-    logger.info(f"\n{torchinfo.summary(model, verbose=0)}")
-
-    model.to(dist_manager.device)
-
-    model = torch.nn.parallel.DistributedDataParallel(
-        model,
-        device_ids=[dist_manager.local_rank],
-        output_device=dist_manager.device,
-    )
-
-    num_params = sum(p.numel() for p in model.parameters())
-    logger.info(f"Number of parameters: {num_params}")
-
-    # Load the normalization file from configured directory (defaults to current dir)
-    norm_dir = getattr(cfg.datapipe, "normalization_dir", ".")
-    if cfg.datapipe.mode == "surface" or cfg.datapipe.mode == "combined":
-        norm_file = str(Path(norm_dir) / "surface_fields_normalization.npz")
-        norm_data = np.load(norm_file)
-        surface_factors = {
-            "mean": torch.from_numpy(norm_data["mean"]).to(dist_manager.device),
-            "std": torch.from_numpy(norm_data["std"]).to(dist_manager.device),
-        }
-    else:
-        surface_factors = None
-
-    if cfg.datapipe.mode == "volume" or cfg.datapipe.mode == "combined":
-        norm_file = str(Path(norm_dir) / "volume_fields_normalization.npz")
-        norm_data = np.load(norm_file)
-        volume_factors = {
-            "mean": torch.from_numpy(norm_data["mean"]).to(dist_manager.device),
-            "std": torch.from_numpy(norm_data["std"]).to(dist_manager.device),
-        }
-    else:
-        volume_factors = None
-
-    # Training dataset
-    train_dataloader = create_transolver_dataset(
-        cfg.datapipe,
-        phase="train",
-        surface_factors=surface_factors,
-        volume_factors=volume_factors,
-    )
-
-    # Validation dataset
-
-    val_dataloader = create_transolver_dataset(
-        cfg.datapipe,
-        phase="val",
-        surface_factors=surface_factors,
-        volume_factors=volume_factors,
-    )
-
-    num_replicas = dist_manager.world_size
-    data_rank = dist_manager.rank
-
-    # Set up distributed samplers
-    train_sampler = torch.utils.data.distributed.DistributedSampler(
-        train_dataloader,
-        num_replicas=num_replicas,
-        rank=data_rank,
-        shuffle=True,
-        drop_last=True,
-    )
-
-    val_sampler = torch.utils.data.distributed.DistributedSampler(
-        val_dataloader,
-        num_replicas=num_replicas,
-        rank=data_rank,
-        shuffle=False,  # No shuffling for validation
-        drop_last=True,
-    )
-
-    muon_params = [p for p in model.parameters() if p.ndim == 2]
-    other_params = [p for p in model.parameters() if p.ndim != 2]
-
-    # Set up optimizer and scheduler
-    optimizer = hydra.utils.instantiate(cfg.training.optimizer, params=other_params)
-
-    optimizer = CombinedOptimizer(
-        optimizers=[
-            torch.optim.Muon(
-                muon_params,
-                lr=cfg.training.optimizer.lr,
-                weight_decay=cfg.training.optimizer.weight_decay,
-                adjust_lr_fn="match_rms_adamw",
-            ),
-            optimizer,
-        ],
-    )
-
-    # Set up learning rate scheduler based on config
-    scheduler_cfg = cfg.training.scheduler
-    scheduler_name = scheduler_cfg.name
-    scheduler_params = dict(scheduler_cfg.params)
-
-    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, **scheduler_params)
-
-    precision = cfg.precision
-    scaler = GradScaler() if precision == "float16" else None
-
-    if precision == "float8" and not TE_AVAILABLE:
-        raise ImportError(
-            "TransformerEngine is not installed.  Please install it to use float8 precision."
-        )
-
-    ckpt_args = {
-        "path": f"{checkpoint_dir}/{cfg.run_id}/checkpoints",
-        "optimizer": optimizer,
-        "scheduler": scheduler,
-        "models": model,
-    }
-
-    loaded_epoch = load_checkpoint(device=dist_manager.device, **ckpt_args)
-
-    if cfg.compile:
-        model = torch.compile(model)
-
-    # Training loop
-    logger.info("Starting training...")
-    for epoch in range(loaded_epoch, cfg.training.num_epochs):
-        # Set the epoch in the samplers
-        train_sampler.set_epoch(epoch)
-        val_sampler.set_epoch(epoch)
-        train_dataloader.dataset.set_indices(list(train_sampler))
-        val_dataloader.dataset.set_indices(list(val_sampler))
-
-        start_time = time.time()
-        # Training phase
-        with Profiler():
-            train_loss = train_epoch(
-                train_dataloader,
-                len(list(train_sampler)),
-                model,
-                output_pad_size,
-                optimizer,
-                scheduler,
-                logger,
-                writer,
-                epoch,
-                cfg,
-                dist_manager,
-                scaler,
-            )
-            end_time = time.time()
-            train_duration = end_time - start_time
-
-            start_time = time.time()
-            # Validation phase
-            val_loss = val_epoch(
-                val_dataloader,
-                len(list(val_sampler)),
-                model,
-                output_pad_size,
-                logger,
-                val_writer,
-                epoch,
-                cfg,
-                dist_manager,
-            )
-            end_time = time.time()
-            val_duration = end_time - start_time
-
-        # Log epoch results
-        logger.info(
-            f"Epoch [{epoch}/{cfg.training.num_epochs}] Train Loss: {train_loss:.6f} [duration: {train_duration:.2f}s] Val Loss: {val_loss:.6f} [duration: {val_duration:.2f}s]"
-        )
-
-        # save checkpoint
-        if epoch % cfg.training.save_interval == 0 and dist_manager.rank == 0:
-            save_checkpoint(**ckpt_args, epoch=epoch + 1)
-
-        if scheduler_name == "StepLR":
-            scheduler.step()
-
-    logger.info("Training completed!")
-
-
-@hydra.main(version_base=None, config_path="conf", config_name="train_surface")
-def launch(cfg: DictConfig):
-    """Launch training with hydra configuration
-
-    Args:
-        cfg: Hydra configuration object
-    """
-
-    # If you want to use `line_profiler` or PyTorch's profiler, enable them here.
-
-    profiler = Profiler()
-    if cfg.profile:
-        profiler.enable("torch")
-        profiler.enable("line_profiler")
-    profiler.initialize()
-    main(cfg)
-    profiler.finalize()
-
-
-if __name__ == "__main__":
-    launch()
diff --git a/examples/cfd/external_aerodynamics/typhon/src/utils.py b/examples/cfd/external_aerodynamics/typhon/src/utils.py
deleted file mode 100644
index a5484e9747..0000000000
--- a/examples/cfd/external_aerodynamics/typhon/src/utils.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-from collections.abc import Iterable, Sequence
-import torch
-import functools
-
-_SEQUENCE_BLOCKLIST = (torch.Tensor, str, bytes)
-
-
-def _is_tensor_sequence(x):
-    return isinstance(x, Sequence) and not isinstance(x, _SEQUENCE_BLOCKLIST)
-
-
-def _coerce_iterable(arg):
-    """
-    Normalize iterable inputs so tensorwise can unzip any sequence-like object,
-    even if it is only an iterator (e.g., zip objects of strings or constants).
-    """
-    if _is_tensor_sequence(arg):
-        return arg, True
-    if isinstance(arg, Iterable) and not isinstance(arg, _SEQUENCE_BLOCKLIST):
-        return tuple(arg), True
-    return arg, False
-
-
-def tensorwise(fn):
-    """
-    Decorator: allow fn(tensor, ...) or fn(list-of-tensors, ...).
-    If any argument is a sequence of tensors, apply fn elementwise. Non-sequence
-    iterables (zip objects, generators of strings, etc.) are automatically
-    materialized so they can participate in the elementwise zip as well.
-    All sequences must be the same length.
-    """
-
-    @functools.wraps(fn)
-    def wrapper(*args, **kwargs):
-        # Detect sequences while allowing generic iterables (e.g., zip objects)
-        normalized_args = []
-        seq_flags = []
-        for arg in args:
-            normalized_arg, is_seq = _coerce_iterable(arg)
-            normalized_args.append(normalized_arg)
-            seq_flags.append(is_seq)
-
-        normalized_kwargs = {}
-        kw_seq_flags = {}
-        for key, value in kwargs.items():
-            normalized_value, is_seq = _coerce_iterable(value)
-            normalized_kwargs[key] = normalized_value
-            kw_seq_flags[key] = is_seq
-
-        any_seq = any(seq_flags) or any(kw_seq_flags.values())
-
-        if not any_seq:
-            # Nothing is a sequence — call normally
-            return fn(*normalized_args, **normalized_kwargs)
-
-        # All sequence arguments must be sequences of the same length
-        # Collect all sequences (positional + keyword)
-        seq_lengths = {len(a) for a, flag in zip(normalized_args, seq_flags) if flag}
-        seq_lengths.update(
-            len(normalized_kwargs[k]) for k, flag in kw_seq_flags.items() if flag
-        )
-        lengths = seq_lengths
-        if len(lengths) != 1:
-            raise ValueError(
-                f"Sequence arguments must have same length; got lengths {lengths}."
-            )
-
-        L = lengths.pop()
-
-        outs = []
-        for i in range(L):
-            # Rebuild ith positional args
-            ith_args = [
-                (a[i] if is_s else a) for a, is_s in zip(normalized_args, seq_flags)
-            ]
-            # Rebuild ith keyword args
-            ith_kwargs = {
-                k: (v[i] if kw_seq_flags[k] else v)
-                for k, v in normalized_kwargs.items()
-            }
-            outs.append(fn(*ith_args, **ith_kwargs))
-
-        return outs
-
-    return wrapper
diff --git a/examples/cfd/external_aerodynamics/typhon/src/volume_fields_normalization.npz b/examples/cfd/external_aerodynamics/typhon/src/volume_fields_normalization.npz
deleted file mode 100644
index c1f0e6f463f1a4efe45af83a2eca8e0ff0c53802..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1056
zcmWIWW@gc4fB;2?4`=kB|Azt&1`&qb)Wkf!yn;$b1_6dCP*pH`vR|lgKqMnW8AG*t
zN@{U(k-C+Fx=osix{iW+T7FSUQDR<veo;y)NZc(kr#KZTUYwCwkP75$nCfWiDAX!Y
z0J!eQGTN7(nz-xi0vj7cC$oKt3j@spP<&G%-PQ>64Li^`#U&|&)iWmrPT4i?^s-Z`
zkhXgjsACt;HWSS=ryjhtM)pi@=Aez4FWg)9UzQO+uuh8c0NW<3gZ%HbW}wDQ<go=h
zD4t2I7_2eFxnIKJ&JhuZg6se7SD%lxKYmKfD!`kONtYQl+kmnK2y;NQ35aT71d*^z
zgsuyeJYl-nK?<Ruf$<!W1x>8znn1|_yQbMd%b<w@T@xs{v1`g;0&0Q;et<VC8^{GL
MKzJ2Mn=*rV05}NonE(I)

diff --git a/physicsnemo/datapipes/cae/transolver_datapipe.py b/physicsnemo/datapipes/cae/transolver_datapipe.py
index 817c6758d1..0161c3a638 100644
--- a/physicsnemo/datapipes/cae/transolver_datapipe.py
+++ b/physicsnemo/datapipes/cae/transolver_datapipe.py
@@ -209,18 +209,6 @@ def preprocess_surface_data(
 
         embeddings = torch.cat(embeddings_inputs, dim=-1)
 
-        # Build fx:
-        fx_inputs = [
-            data_dict["air_density"],
-            data_dict["stream_velocity"],
-        ]
-        fx = torch.stack(fx_inputs, dim=-1)
-
-        if self.config.broadcast_global_features:
-            fx = fx.broadcast_to(embeddings.shape[0], -1)
-        else:
-            fx = fx.unsqueeze(0)
-
         fields = data_dict["surface_fields"]
         if idx is not None:
             fields = fields[idx]
@@ -228,11 +216,30 @@ def preprocess_surface_data(
         if self.config.scaling_type is not None:
             fields = self.scale_model_targets(fields, self.config.surface_factors)
 
-        return {
-            "embeddings": embeddings,
-            "fx": fx,
-            "fields": fields,
-        }
+        if "air_density" in data_dict and "stream_velocity" in data_dict:
+            # Build fx:
+            fx_inputs = [
+                data_dict["air_density"],
+                data_dict["stream_velocity"],
+            ]
+            fx = torch.stack(fx_inputs, dim=-1)
+
+            if self.config.broadcast_global_features:
+                fx = fx.broadcast_to(embeddings.shape[0], -1)
+            else:
+                fx = fx.unsqueeze(0)
+
+            return {
+                "embeddings": embeddings,
+                "fx": fx,
+                "fields": fields,
+            }
+
+        else:
+            return {
+                "embeddings": embeddings,
+                "fields": fields,
+            }
 
     def preprocess_volume_data(
         self,
@@ -312,18 +319,6 @@ def preprocess_volume_data(
 
         embeddings = torch.cat(embeddings_inputs, dim=-1)
 
-        # Build fx:
-        fx_inputs = [
-            data_dict["air_density"],
-            data_dict["stream_velocity"],
-        ]
-        fx = torch.stack(fx_inputs, dim=-1)
-
-        if self.config.broadcast_global_features:
-            fx = fx.broadcast_to(embeddings.shape[0], -1)
-        else:
-            fx = fx.unsqueeze(0)
-
         fields = data_dict["volume_fields"]
         if idx is not None:
             fields = fields[idx]
@@ -331,11 +326,29 @@ def preprocess_volume_data(
         if self.config.scaling_type is not None:
             fields = self.scale_model_targets(fields, self.config.volume_factors)
 
-        return {
-            "embeddings": embeddings,
-            "fx": fx,
-            "fields": fields,
-        }
+        if "air_density" in data_dict and "stream_velocity" in data_dict:
+            # Build fx:
+            fx_inputs = [
+                data_dict["air_density"],
+                data_dict["stream_velocity"],
+            ]
+            fx = torch.stack(fx_inputs, dim=-1)
+
+            if self.config.broadcast_global_features:
+                fx = fx.broadcast_to(embeddings.shape[0], -1)
+            else:
+                fx = fx.unsqueeze(0)
+
+            return {
+                "embeddings": embeddings,
+                "fx": fx,
+                "fields": fields,
+            }
+        else:
+            return {
+                "embeddings": embeddings,
+                "fields": fields,
+            }
 
     def process_geometry(
         self,
@@ -564,8 +577,8 @@ def unscale_model_targets(
             field_max = factors["max"]
             fields = unnormalize(fields, field_max, field_min)
 
-        if air_density is not None and stream_velocity is not None:
-            fields = fields * air_density * stream_velocity**2
+        # if air_density is not None and stream_velocity is not None:
+        #     fields = fields * air_density * stream_velocity**2
 
         return fields
 

From 55ab1ebcf15b5b17b32a505b62d31177a0244ded Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 10 Dec 2025 16:27:24 +0000
Subject: [PATCH 18/32] Refactor typhon to improve readability and
 maintainability of BW path

---
 .../models/typhon/context_projector.py        | 559 ++++++++++++++
 .../experimental/models/typhon/gale.py        | 340 +++++++++
 .../experimental/models/typhon/typhon.py      | 684 +-----------------
 test/models/typhon/test_gale.py               |   2 +-
 test/models/typhon/test_typhon.py             |  79 +-
 5 files changed, 990 insertions(+), 674 deletions(-)
 create mode 100644 physicsnemo/experimental/models/typhon/context_projector.py
 create mode 100644 physicsnemo/experimental/models/typhon/gale.py

diff --git a/physicsnemo/experimental/models/typhon/context_projector.py b/physicsnemo/experimental/models/typhon/context_projector.py
new file mode 100644
index 0000000000..26a4ce5ae3
--- /dev/null
+++ b/physicsnemo/experimental/models/typhon/context_projector.py
@@ -0,0 +1,559 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Context Projector for Typhon model.
+
+This module provides the ContextProjector class, which projects context features
+(geometry or global embeddings) onto learned physical state spaces for use in
+GALE attention layers.
+"""
+
+import torch
+import torch.nn as nn
+from einops import rearrange
+
+from physicsnemo.utils.version_check import check_min_version
+from physicsnemo.models.transolver.Physics_Attention import gumbel_softmax
+
+# Check optional dependency availability
+TE_AVAILABLE = check_min_version("transformer_engine", "0.1.0", hard_fail=False)
+if TE_AVAILABLE:
+    import transformer_engine.pytorch as te
+
+
+class ContextProjector(nn.Module):
+    r"""Projects context features onto physical state space.
+
+    This context projector is conceptually similar to half of a GALE attention layer.
+    It projects context values (geometry or global embeddings) onto a learned physical
+    state space, but unlike a full attention layer, it never projects back to the
+    original space. The projected features are used as context in all GALE blocks
+    of the Typhon model.
+
+    Parameters
+    ----------
+    dim : int
+        Input dimension of the context features.
+    heads : int, optional
+        Number of projection heads. Default is 8.
+    dim_head : int, optional
+        Dimension of each projection head. Default is 64.
+    dropout : float, optional
+        Dropout rate. Default is 0.0.
+    slice_num : int, optional
+        Number of learned physical state slices. Default is 64.
+    use_te : bool, optional
+        Whether to use Transformer Engine backend when available. Default is True.
+    plus : bool, optional
+        Whether to use Transolver++ features. Default is False.
+
+    Notes
+    -----
+    The global features are reused in all blocks of the model, so the learned
+    projections must capture globally useful features rather than layer-specific ones.
+
+    See Also
+    --------
+    :class:`GALE` : Full GALE attention layer that uses these projected context features.
+    :class:`Typhon` : Main model that uses ContextProjector for geometry and global embeddings.
+    """
+
+    def __init__(
+        self,
+        dim: int,
+        heads: int = 8,
+        dim_head: int = 64,
+        dropout: float = 0.0,
+        slice_num: int = 64,
+        use_te: bool = True,
+        plus: bool = False,
+    ):
+        super().__init__()
+        inner_dim = dim_head * heads
+        self.dim_head = dim_head
+        self.heads = heads
+        self.plus = plus
+        self.scale = dim_head**-0.5
+        self.use_te = use_te
+
+        # Choose linear layer implementation based on backend
+        linear_layer = te.Linear if (use_te and TE_AVAILABLE) else nn.Linear
+
+        # Input projection layers
+        self.in_project_x = linear_layer(dim, inner_dim)
+        if not plus:
+            self.in_project_fx = linear_layer(dim, inner_dim)
+
+        # Attention components
+        self.softmax = nn.Softmax(dim=-1)
+        self.dropout = nn.Dropout(dropout)
+        self.temperature = nn.Parameter(torch.ones([1, heads, 1, 1]) * 0.5)
+
+        # Transolver++ adaptive temperature projection
+        if plus:
+            self.proj_temperature = nn.Sequential(
+                linear_layer(self.dim_head, slice_num),
+                nn.GELU(),
+                linear_layer(slice_num, 1),
+                nn.GELU(),
+            )
+
+        # Slice projection layer
+        self.in_project_slice = linear_layer(dim_head, slice_num)
+
+    def project_input_onto_slices(
+        self, x: torch.Tensor
+    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        r"""Project the input onto the slice space.
+
+        Parameters
+        ----------
+        x : torch.Tensor
+            Input tensor of shape \((B, N, C)\) where \(B\) is batch size,
+            \(N\) is number of tokens, and \(C\) is number of channels.
+
+        Returns
+        -------
+        torch.Tensor or tuple[torch.Tensor, torch.Tensor]
+            If ``plus=True``, returns single tensor of shape \((B, H, N, D)\) where
+            \(H\) is number of heads and \(D\) is head dimension. If ``plus=False``,
+            returns tuple of two tensors both of shape \((B, H, N, D)\), representing
+            the query and key projections respectively.
+        """
+        # Project input to multi-head representation
+        projected_x = rearrange(
+            self.in_project_x(x), "B N (h d) -> B h N d", h=self.heads, d=self.dim_head
+        )
+
+        if self.plus:
+            # Transolver++ uses single projection
+            return projected_x
+        else:
+            # Standard Transolver uses separate query and key projections
+            feature_projection = rearrange(
+                self.in_project_fx(x),
+                "B N (h d) -> B h N d",
+                h=self.heads,
+                d=self.dim_head,
+            )
+            return projected_x, feature_projection
+
+    def compute_slices_from_projections(
+        self, slice_projections: torch.Tensor, fx: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        r"""Compute slice weights and slice tokens from input projections and latent features.
+
+        Parameters
+        ----------
+        slice_projections : torch.Tensor
+            Projected input tensor of shape \((B, H, N, S)\) where \(B\) is batch size,
+            \(H\) is number of heads, \(N\) is number of tokens, and \(S\) is number of
+            slices, representing the projection of each token onto each slice for each
+            attention head.
+        fx : torch.Tensor
+            Latent feature tensor of shape \((B, H, N, D)\) where \(D\) is head dimension,
+            representing the learned states to be aggregated by the slice weights.
+
+        Returns
+        -------
+        tuple[torch.Tensor, torch.Tensor]
+            - ``slice_weights``: Tensor of shape \((B, H, N, S)\), normalized weights for
+              each slice per token and head.
+            - ``slice_token``: Tensor of shape \((B, H, S, D)\), aggregated latent features
+              for each slice, head, and batch.
+
+        Notes
+        -----
+        The function computes a temperature-scaled softmax over the slice projections to
+        obtain slice weights, then aggregates the latent features for each slice using
+        these weights. The aggregated features are normalized by the sum of weights for
+        numerical stability.
+        """
+        # Compute temperature-adjusted softmax weights
+        if self.plus:
+            # Transolver++ uses adaptive temperature with Gumbel softmax
+            temperature = self.temperature + self.proj_temperature(fx)
+            clamped_temp = torch.clamp(temperature, min=0.01).to(
+                slice_projections.dtype
+            )
+            slice_weights = gumbel_softmax(slice_projections, clamped_temp)
+        else:
+            # Standard Transolver uses fixed temperature with regular softmax
+            clamped_temp = torch.clamp(self.temperature, min=0.5, max=5).to(
+                slice_projections.dtype
+            )
+            slice_weights = nn.functional.softmax(
+                slice_projections / clamped_temp, dim=-1
+            )
+
+        # Ensure weights match the computation dtype
+        slice_weights = slice_weights.to(slice_projections.dtype)
+
+        # Aggregate features by slice weights
+        # Normalize first to prevent overflow in reduced precision
+        slice_norm = slice_weights.sum(2)  # Sum over tokens: [B, H, S]
+        normed_weights = slice_weights / (slice_norm[:, :, None, :] + 1e-2)
+        slice_token = torch.matmul(normed_weights.transpose(2, 3), fx)
+
+        return slice_weights, slice_token
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        r"""Project inputs to physical state slices.
+
+        This performs a partial physics attention operation: it projects the input onto
+        learned physical state slices but does not project back to the original space.
+        The resulting slice tokens serve as context for GALE attention layers.
+
+        Parameters
+        ----------
+        x : torch.Tensor
+            Input tensor of shape \((B, N, C)\) where \(B\) is batch size, \(N\) is
+            number of tokens, and \(C\) is number of channels.
+
+        Returns
+        -------
+        torch.Tensor
+            Slice tokens of shape \((B, H, S, D)\) where \(H\) is number of heads,
+            \(S\) is number of slices, and \(D\) is head dimension.
+
+        Notes
+        -----
+        This method implements the encoding portion of the physics attention mechanism.
+        The slice tokens capture learned physical state representations that are used
+        as cross-attention context throughout the model.
+        """
+        # Project inputs onto learned latent spaces
+        if self.plus:
+            projected_x = self.project_input_onto_slices(x)
+            # Transolver++ reuses the same projection for both paths
+            feature_projection = projected_x
+        else:
+            projected_x, feature_projection = self.project_input_onto_slices(x)
+
+        # Project latent representations onto physical state slices
+        slice_projections = self.in_project_slice(projected_x)
+
+        # Compute weighted aggregation of features into slice tokens
+        _, slice_tokens = self.compute_slices_from_projections(
+            slice_projections, feature_projection
+        )
+
+        return slice_tokens
+
+
+class GeometricFeatureProcessor(nn.Module):
+    r"""Processes geometric features at a single spatial scale using BQWarp.
+
+    This is a simple, reusable component that handles neighbor querying and
+    feature processing for one radius scale. It encapsulates the BQWarp +
+    MLP pattern used throughout the model.
+
+    Parameters
+    ----------
+    radius : float
+        Query radius for neighbor search.
+    neighbors_in_radius : int
+        Maximum number of neighbors within the radius.
+    feature_dim : int
+        Dimension of the input features to query.
+    hidden_dim : int
+        Output dimension after MLP processing.
+    """
+
+    def __init__(
+        self,
+        radius: float,
+        neighbors_in_radius: int,
+        feature_dim: int,
+        hidden_dim: int,
+    ):
+        super().__init__()
+        from physicsnemo.models.layers import BQWarp, Mlp
+
+        self.bq_warp = BQWarp(radius=radius, neighbors_in_radius=neighbors_in_radius)
+        self.mlp = Mlp(
+            in_features=feature_dim * neighbors_in_radius,
+            hidden_features=[hidden_dim, hidden_dim // 2],
+            out_features=hidden_dim,
+            act_layer=nn.GELU,
+            drop=0.0,
+        )
+
+    def forward(
+        self, query_points: torch.Tensor, key_features: torch.Tensor
+    ) -> torch.Tensor:
+        r"""Query neighbors and process features.
+
+        Parameters
+        ----------
+        query_points : torch.Tensor
+            Query coordinates of shape \((B, N, 3)\).
+        key_features : torch.Tensor
+            Features to query from of shape \((B, N, C)\).
+
+        Returns
+        -------
+        torch.Tensor
+            Processed features of shape \((B, N, hidden_dim)\).
+        """
+        print(f"query_points shape: {query_points.shape}")
+        print(f"key_features shape: {key_features.shape}")
+        
+        _, neighbors = self.bq_warp(query_points, key_features)
+        b, n, k, c = neighbors.shape
+        neighbors_flat = rearrange(neighbors, "b n k c -> b n (k c)")
+        return torch.nn.functional.tanh(self.mlp(neighbors_flat))
+
+
+class MultiScaleFeatureExtractor(nn.Module):
+    r"""Multi-scale geometric feature extraction with minimal complexity.
+
+    Manages multiple GeometricFeatureProcessor instances for different radii.
+    Provides both tokenized context and concatenated local features.
+
+    Parameters
+    ----------
+    geometry_dim : int
+        Dimension of geometry features.
+    radii : list[float]
+        Radii for multi-scale processing.
+    neighbors_in_radius : list[int]
+        Neighbors per radius.
+    hidden_dim : int
+        Hidden dimension for processing.
+    n_head : int
+        Number of attention heads.
+    dim_head : int
+        Dimension per head.
+    dropout : float, optional
+        Dropout rate. Default is 0.0.
+    slice_num : int, optional
+        Number of slices. Default is 64.
+    use_te : bool, optional
+        Use Transformer Engine. Default is True.
+    plus : bool, optional
+        Use Transolver++. Default is False.
+    """
+
+    def __init__(
+        self,
+        geometry_dim: int,
+        radii: list[float],
+        neighbors_in_radius: list[int],
+        hidden_dim: int,
+        n_head: int,
+        dim_head: int,
+        dropout: float = 0.0,
+        slice_num: int = 64,
+        use_te: bool = True,
+        plus: bool = False,
+    ):
+        super().__init__()
+        self.num_scales = len(radii)
+
+        # One processor per scale - simple and reusable
+        self.processors = nn.ModuleList(
+            [
+                GeometricFeatureProcessor(radii[i], neighbors_in_radius[i], geometry_dim, hidden_dim)
+                for i in range(self.num_scales)
+            ]
+        )
+
+        # One tokenizer per scale for context features
+        self.tokenizers = nn.ModuleList(
+            [
+                ContextProjector(hidden_dim, n_head, dim_head, dropout, slice_num, use_te, plus)
+                for _ in range(self.num_scales)
+            ]
+        )
+
+    def extract_context_features(
+        self, spatial_coords: torch.Tensor, geometry: torch.Tensor
+    ) -> list[torch.Tensor]:
+        r"""Extract and tokenize features for context."""
+        return [
+            tokenizer(processor(spatial_coords, geometry))
+            for processor, tokenizer in zip(self.processors, self.tokenizers)
+        ]
+
+    def extract_local_features(
+        self, spatial_coords: torch.Tensor, geometry: torch.Tensor
+    ) -> torch.Tensor:
+        r"""Extract and concatenate features for local pathway."""
+        return torch.cat(
+            [processor(geometry, spatial_coords) for processor in self.processors],
+            dim=-1,
+        )
+
+
+class GlobalContextBuilder(nn.Module):
+    r"""Orchestrates all context construction with a clean, simple interface.
+
+    Manages geometry tokenization, global embedding tokenization, and optional
+    multi-scale local features.
+
+    Parameters
+    ----------
+    functional_dims : tuple[int, ...]
+        Dimensions of each functional input type.
+    geometry_dim : int | None, optional
+        Geometry feature dimension. Default is None.
+    global_dim : int | None, optional
+        Global embedding dimension. Default is None.
+    radii : list[float], optional
+        Radii for local features. Default is [0.05, 0.25].
+    neighbors_in_radius : list[int], optional
+        Neighbors per radius. Default is [8, 32].
+    n_hidden_local : int, optional
+        Hidden dim for local features. Default is 32.
+    n_hidden : int, optional
+        Model hidden dimension. Default is 256.
+    n_head : int, optional
+        Number of attention heads. Default is 8.
+    dropout : float, optional
+        Dropout rate. Default is 0.0.
+    slice_num : int, optional
+        Number of slices. Default is 32.
+    use_te : bool, optional
+        Use Transformer Engine. Default is True.
+    plus : bool, optional
+        Use Transolver++. Default is False.
+    include_local_features : bool, optional
+        Enable local feature extraction. Default is False.
+    """
+
+    def __init__(
+        self,
+        functional_dims: tuple[int, ...],
+        geometry_dim: int | None = None,
+        global_dim: int | None = None,
+        radii: list[float] = [0.05, 0.25],
+        neighbors_in_radius: list[int] = [8, 32],
+        n_hidden_local: int = 32,
+        n_hidden: int = 256,
+        n_head: int = 8,
+        dropout: float = 0.0,
+        slice_num: int = 32,
+        use_te: bool = True,
+        plus: bool = False,
+        include_local_features: bool = False,
+    ):
+        super().__init__()
+
+        dim_head = n_hidden // n_head
+        context_dim = 0
+
+        # Multi-scale extractors for local features (one per functional dim)
+        if geometry_dim is not None and include_local_features:
+            self.local_extractors = nn.ModuleList(
+                [
+                    MultiScaleFeatureExtractor(
+                        geometry_dim, radii, neighbors_in_radius, n_hidden_local,
+                        n_head, dim_head, dropout, slice_num, use_te, plus
+                    )
+                    for _ in functional_dims
+                ]
+            )
+            context_dim += dim_head * len(radii) * len(functional_dims)
+        else:
+            self.local_extractors = None
+
+        # Geometry tokenizer
+        if geometry_dim is not None:
+            self.geometry_tokenizer = ContextProjector(
+                geometry_dim, n_head, dim_head, dropout, slice_num, use_te, plus
+            )
+            context_dim += dim_head
+        else:
+            self.geometry_tokenizer = None
+
+        # Global tokenizer
+        if global_dim is not None:
+            self.global_tokenizer = ContextProjector(
+                global_dim, n_head, dim_head, dropout, slice_num, use_te, plus
+            )
+            context_dim += dim_head
+        else:
+            self.global_tokenizer = None
+
+        self._context_dim = context_dim
+
+    def get_context_dim(self) -> int:
+        r"""Return total context dimension."""
+        return self._context_dim
+
+    def build_context(
+        self,
+        local_embeddings: tuple[torch.Tensor, ...],
+        local_positions: tuple[torch.Tensor, ...],
+        geometry: torch.Tensor | None = None,
+        global_embedding: torch.Tensor | None = None,
+    ) -> tuple[torch.Tensor | None, list[torch.Tensor] | None]:
+        r"""Build all context and local features.
+
+        Parameters
+        ----------
+        local_embeddings : tuple[torch.Tensor, ...]
+            Input embeddings, each of shape \((B, N, C_i)\).
+        local_positions : tuple[torch.Tensor, ...] | None, optional
+            Local positions, each of shape \((B, N, 3)\). These are used to query the neighbors.
+        geometry : torch.Tensor | None, optional
+            Geometry of shape \((B, N, C_{geo})\). Default is None.
+        global_embedding : torch.Tensor | None, optional
+            Global embedding of shape \((B, N_g, C_g)\). Default is None.
+
+        Returns
+        -------
+        tuple[torch.Tensor | None, list[torch.Tensor] | None]
+            Context tensor and local features list.
+        """
+        context_parts = []
+        local_features = None
+
+        if local_positions is None and self.local_extractors is not None:
+            raise ValueError("Local positions are required if local features are enabled.")
+
+        # Extract multi-scale features if enabled
+        if self.local_extractors is not None and geometry is not None:
+            local_features = []
+            for i, embedding in enumerate(local_embeddings):
+                spatial_coords = local_positions[i]  # Extract coordinates
+                
+                # Get tokenized context features
+                context_feats = self.local_extractors[i].extract_context_features(
+                    spatial_coords, geometry
+                )
+                context_parts.extend(context_feats)
+                
+                # Get concatenated local features
+                local_feats = self.local_extractors[i].extract_local_features(
+                    spatial_coords, geometry
+                )
+                local_features.append(local_feats)
+
+        # Tokenize geometry
+        if self.geometry_tokenizer is not None and geometry is not None:
+            context_parts.append(self.geometry_tokenizer(geometry))
+
+        # Tokenize global embedding
+        if self.global_tokenizer is not None and global_embedding is not None:
+            context_parts.append(self.global_tokenizer(global_embedding))
+
+        # Concatenate all context
+        context = torch.cat(context_parts, dim=-1) if context_parts else None
+
+        return context, local_features
+
diff --git a/physicsnemo/experimental/models/typhon/gale.py b/physicsnemo/experimental/models/typhon/gale.py
new file mode 100644
index 0000000000..2e2f0087c8
--- /dev/null
+++ b/physicsnemo/experimental/models/typhon/gale.py
@@ -0,0 +1,340 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+from collections.abc import Sequence
+
+import torch
+import torch.nn as nn
+from einops import rearrange
+import torch.nn.functional as F
+
+import physicsnemo  # noqa: F401 for docs
+from physicsnemo.utils.version_check import check_min_version
+from physicsnemo.models.transolver.Physics_Attention import (
+    PhysicsAttentionIrregularMesh,
+    gumbel_softmax,
+)
+from physicsnemo.models.transolver.transolver import MLP
+from physicsnemo.models.layers import BQWarp, fourier_encode, Mlp
+
+from physicsnemo.models.meta import ModelMetaData
+from physicsnemo.models.module import Module
+
+# Check optional dependency availability
+TE_AVAILABLE = check_min_version("transformer_engine", "0.1.0", hard_fail=False)
+if TE_AVAILABLE:
+    import transformer_engine.pytorch as te
+
+class GALE(PhysicsAttentionIrregularMesh):
+    r"""Geometry-Aware Latent Embeddings (GALE) attention layer.
+
+    This is an extension of the Transolver PhysicsAttention mechanism to support
+    cross-attention with a context vector, built from geometry and global embeddings.
+    GALE combines self-attention on learned physical state slices with cross-attention
+    to geometry-aware context, using a learnable mixing weight to blend the two.
+
+    Parameters
+    ----------
+    dim : int
+        Input dimension of the features.
+    heads : int, optional
+        Number of attention heads. Default is 8.
+    dim_head : int, optional
+        Dimension of each attention head. Default is 64.
+    dropout : float, optional
+        Dropout rate. Default is 0.0.
+    slice_num : int, optional
+        Number of learned physical state slices. Default is 64.
+    use_te : bool, optional
+        Whether to use Transformer Engine backend when available. Default is True.
+    plus : bool, optional
+        Whether to use Transolver++ features. Default is False.
+    context_dim : int, optional
+        Dimension of the context vector for cross-attention. Default is 0.
+
+    Notes
+    -----
+    The mixing between self-attention and cross-attention is controlled by a learnable
+    parameter ``state_mixing`` which is passed through a sigmoid function to ensure
+    the mixing weight stays in \([0, 1]\).
+
+    See Also
+    --------
+    :class:`physicsnemo.models.transolver.Physics_Attention.PhysicsAttentionIrregularMesh` : Base physics attention class.
+    :class:`GALE_block` : Transformer block using GALE attention.
+    """
+
+    def __init__(
+        self,
+        dim,
+        heads: int = 8,
+        dim_head: int = 64,
+        dropout: float = 0.0,
+        slice_num: int = 64,
+        use_te: bool = True,
+        plus: bool = False,
+        context_dim: int = 0,
+    ):
+        super().__init__(dim, heads, dim_head, dropout, slice_num, use_te, plus)
+
+        linear_layer = te.Linear if self.use_te else nn.Linear
+
+        # We have additional parameters, here:
+        self.cross_q = linear_layer(dim_head, dim_head)
+        self.cross_k = linear_layer(context_dim, dim_head)
+        self.cross_v = linear_layer(context_dim, dim_head)
+
+        # This is the learnable mixing weight between self and cross attention.
+        # We start near 0.0 since it is passed through a sigmoid to keep the
+        # mixing weight between 0 and 1.
+        self.state_mixing = nn.Parameter(torch.tensor(0.0))
+
+    def compute_slice_attention_cross(
+        self, slice_tokens: torch.Tensor, context: torch.Tensor
+    ) -> torch.Tensor:
+        r"""Compute cross-attention between slice tokens and context.
+
+        Parameters
+        ----------
+        slice_tokens : torch.Tensor
+            Slice tokens of shape \((B, H, N, D)\) where \(B\) is batch size, \(H\) is number of heads, \(N\) is number of slices, and \(D\) is head dimension.
+        context : torch.Tensor
+            Context tensor of shape \((B, H, N_c, D_c)\) where \(N_c\) is number of context slices and \(D_c\) is context dimension.
+
+        Returns
+        -------
+        torch.Tensor
+            Cross-attention output of shape \((B, H, N, D)\).
+        """
+
+        # Project the slice and context tokens:
+
+        q_input = torch.cat(slice_tokens, dim=-2)
+        q = self.cross_q(q_input)
+        
+        k = self.cross_k(context)
+        v = self.cross_v(context)
+
+        # Compute the attention:
+        if self.use_te:
+            q = rearrange(q, "b h s d -> b s h d")
+            k = rearrange(k, "b h s d -> b s h d")
+            v = rearrange(v, "b h s d -> b s h d")
+            cross_attention = self.attn_fn(q, k, v)
+            cross_attention = rearrange(
+                cross_attention, "b s (h d) -> b h s d", h=self.heads, d=self.dim_head
+            )
+        else:
+            cross_attention = torch.nn.functional.scaled_dot_product_attention(
+                q, k, v, is_causal=False
+            )
+        cross_attention = torch.split(cross_attention, slice_tokens[0].shape[-2], dim=-2)
+
+
+        return cross_attention
+
+    def forward(
+        self, x: tuple[torch.Tensor, ...], context: tuple[torch.Tensor, ...] | None = None
+    ) -> torch.Tensor:
+        r"""Forward pass of the GALE module.
+
+        Parameters
+        ----------
+        x : torch.Tensor
+            Input tensor of shape \((B, N, C)\) where \(B\) is batch size, \(N\) is number of tokens, and \(C\) is number of channels.
+        context : torch.Tensor, optional
+            Context tensor for cross-attention of shape \((B, H, S_c, D_c)\) where \(H\) is number of heads, \(S_c\) is number of context slices, and \(D_c\) is context dimension. If None, only self-attention is applied. Default is None.
+
+        Returns
+        -------
+        torch.Tensor
+            Output tensor of shape \((B, N, C)\), same shape as input.
+        """
+        # Project the inputs onto learned spaces:
+        if self.plus:
+            x_mid = [ self.project_input_onto_slices(_x) for _x in x ]
+            # In transolver ++, fx_mid is gone.
+            # x_mid is used to compute the projections instead:
+            fx_mid = [ _x_mid for _x_mid in x_mid ]
+        else:
+            x_mid, fx_mid = zip(*[ self.project_input_onto_slices(_x) for _x in x ])
+
+        # Perform the linear projection of learned latent space onto slices:
+        slice_projections = [ self.in_project_slice(_x_mid) for _x_mid in x_mid ]
+
+        # Slice projections has shape [B, N_head, N_tokens, Head_dim], but head_dim may have changed!
+        # Use the slice projections and learned spaces to compute the slices, and their weights:
+        slice_weights, slice_tokens = zip(*[self.compute_slices_from_projections(proj, _fx_mid) for proj, _fx_mid in zip(slice_projections, fx_mid)])
+        # slice_weights has shape [Batch, N_heads, N_tokens, Slice_num]
+        # slice_tokens has shape  [Batch, N_heads, N_tokens, head_dim]
+        # Apply attention to the slice tokens
+        if self.use_te:
+            self_slice_token = [ self.compute_slice_attention_te(_slice_token) for _slice_token in slice_tokens ]
+        else:
+            self_slice_token = [ self.compute_slice_attention_sdpa(_slice_token) for _slice_token in slice_tokens ]
+        
+        # HERE, we are differing: apply cross-attention with physical states:
+        if context is not None:
+            # cross_slice_token = self.compute_slice_attention_cross(
+            #     slice_tokens, context
+            # )
+            cross_slice_token = [ self.compute_slice_attention_cross([_slice_token], context)[0] 
+                for _slice_token in slice_tokens 
+            ]
+            
+            # Apply learnable mixing:
+            mixing_weight = torch.sigmoid(self.state_mixing)
+            out_slice_token = [ mixing_weight * sst + (1 - mixing_weight) * cst
+                for sst, cst in zip(self_slice_token, cross_slice_token)
+            ]
+
+        else:
+            # Just keep self attention:
+            out_slice_token = self_slice_token
+
+        # Shape unchanged
+
+        # Deslice:
+        outputs = [
+            self.project_attention_outputs(ost, sw) for ost, sw in zip(out_slice_token, slice_weights)
+        ]
+
+        # Outputs now has the same shape as the original input x
+
+        return outputs
+
+
+class GALE_block(nn.Module):
+    r"""Transformer encoder block using GALE attention.
+
+    This block replaces standard self-attention with the GALE (Geometry-Aware Latent
+    Embeddings) attention mechanism, which combines physics-aware self-attention with
+    cross-attention to geometry and global context.
+
+    Parameters
+    ----------
+    num_heads : int
+        Number of attention heads.
+    hidden_dim : int
+        Hidden dimension of the transformer.
+    dropout : float
+        Dropout rate.
+    act : str, optional
+        Activation function name. Default is "gelu".
+    mlp_ratio : int, optional
+        Ratio of MLP hidden dimension to ``hidden_dim``. Default is 4.
+    last_layer : bool, optional
+        Whether this is the last layer in the model. Default is False.
+    out_dim : int, optional
+        Output dimension (only used if ``last_layer=True``). Default is 1.
+    slice_num : int, optional
+        Number of learned physical state slices. Default is 32.
+    use_te : bool, optional
+        Whether to use Transformer Engine backend. Default is True.
+    plus : bool, optional
+        Whether to use Transolver++ features. Default is False.
+    context_dim : int, optional
+        Dimension of the context vector for cross-attention. Default is 0.
+
+    Notes
+    -----
+    The block applies layer normalization before the attention operation and uses
+    residual connections after both the attention and MLP layers.
+    """
+
+    def __init__(
+        self,
+        num_heads: int,
+        hidden_dim: int,
+        dropout: float,
+        act="gelu",
+        mlp_ratio=4,
+        last_layer=False,
+        out_dim=1,
+        slice_num=32,
+        use_te=True,
+        plus: bool = False,
+        context_dim: int = 0,
+    ):
+        super().__init__()
+
+        if use_te and not TE_AVAILABLE:
+            raise ImportError(
+                "Transformer Engine is not installed. Please install it with: pip install transformer-engine>=0.1.0"
+            )
+
+        self.last_layer = last_layer
+        if use_te:
+            self.ln_1 = te.LayerNorm(hidden_dim)
+        else:
+            self.ln_1 = nn.LayerNorm(hidden_dim)
+
+        self.Attn = GALE(
+            hidden_dim,
+            heads=num_heads,
+            dim_head=hidden_dim // num_heads,
+            dropout=dropout,
+            slice_num=slice_num,
+            use_te=use_te,
+            plus=plus,
+            context_dim=context_dim,
+        )
+
+        if use_te:
+            self.ln_mlp1 = te.LayerNormMLP(
+                hidden_size=hidden_dim,
+                ffn_hidden_size=hidden_dim * mlp_ratio,
+            )
+        else:
+            self.ln_mlp1 = nn.Sequential(
+                nn.LayerNorm(hidden_dim),
+                MLP(
+                    hidden_dim,
+                    hidden_dim * mlp_ratio,
+                    hidden_dim,
+                    n_layers=0,
+                    res=False,
+                    act=act,
+                    use_te=False,
+                ),
+            )
+
+    def forward(self, fx: tuple[torch.Tensor, ...], global_context: tuple[torch.Tensor, ...]) -> torch.Tensor:
+        r"""Forward pass of the GALE block.
+
+        Parameters
+        ----------
+        fx : torch.Tensor
+            Input tensor of shape \((B, N, C)\) where \(B\) is batch size, \(N\) is number of tokens, and \(C\) is hidden dimension.
+        global_context : torch.Tensor
+            Global context tensor for cross-attention of shape \((B, H, S_c, D_c)\) where \(H\) is number of heads, \(S_c\) is number of context slices, and \(D_c\) is context dimension.
+
+        Returns
+        -------
+        torch.Tensor
+            Output tensor of shape \((B, N, C)\), same shape as input.
+        """
+        
+        normed_inputs = [ self.ln_1(_fx) for _fx in fx ]
+        attn = self.Attn(normed_inputs, global_context)
+        
+        fx = [ attn[i] + normed_inputs[i] for i in range(len(normed_inputs)) ]
+        
+        fx = [ self.ln_mlp1(_fx) + _fx for _fx in fx ]
+
+        return fx
+
diff --git a/physicsnemo/experimental/models/typhon/typhon.py b/physicsnemo/experimental/models/typhon/typhon.py
index f5641e3d61..84d61467cd 100644
--- a/physicsnemo/experimental/models/typhon/typhon.py
+++ b/physicsnemo/experimental/models/typhon/typhon.py
@@ -19,336 +19,22 @@
 
 import torch
 import torch.nn as nn
-from einops import rearrange
-import torch.nn.functional as F
 
 import physicsnemo  # noqa: F401 for docs
 from physicsnemo.utils.version_check import check_min_version
-from physicsnemo.models.transolver.Physics_Attention import (
-    PhysicsAttentionIrregularMesh,
-    gumbel_softmax,
-)
 from physicsnemo.models.transolver.transolver import MLP
-from physicsnemo.models.layers import BQWarp, fourier_encode, Mlp
 
 from physicsnemo.models.meta import ModelMetaData
 from physicsnemo.models.module import Module
 
+from .context_projector import ContextProjector, GlobalContextBuilder
+from .gale import GALE_block
+
 # Check optional dependency availability
 TE_AVAILABLE = check_min_version("transformer_engine", "0.1.0", hard_fail=False)
 if TE_AVAILABLE:
     import transformer_engine.pytorch as te
 
-ACTIVATION = {
-    "gelu": nn.GELU,
-    "tanh": nn.Tanh,
-    "sigmoid": nn.Sigmoid,
-    "relu": nn.ReLU,
-    "leaky_relu": nn.LeakyReLU(0.1),
-    "softplus": nn.Softplus,
-    "ELU": nn.ELU,
-    "silu": nn.SiLU,
-}
-
-
-class GALE(PhysicsAttentionIrregularMesh):
-    r"""Geometry-Aware Latent Embeddings (GALE) attention layer.
-
-    This is an extension of the Transolver PhysicsAttention mechanism to support
-    cross-attention with a context vector, built from geometry and global embeddings.
-    GALE combines self-attention on learned physical state slices with cross-attention
-    to geometry-aware context, using a learnable mixing weight to blend the two.
-
-    Parameters
-    ----------
-    dim : int
-        Input dimension of the features.
-    heads : int, optional
-        Number of attention heads. Default is 8.
-    dim_head : int, optional
-        Dimension of each attention head. Default is 64.
-    dropout : float, optional
-        Dropout rate. Default is 0.0.
-    slice_num : int, optional
-        Number of learned physical state slices. Default is 64.
-    use_te : bool, optional
-        Whether to use Transformer Engine backend when available. Default is True.
-    plus : bool, optional
-        Whether to use Transolver++ features. Default is False.
-    context_dim : int, optional
-        Dimension of the context vector for cross-attention. Default is 0.
-
-    Notes
-    -----
-    The mixing between self-attention and cross-attention is controlled by a learnable
-    parameter ``state_mixing`` which is passed through a sigmoid function to ensure
-    the mixing weight stays in \([0, 1]\).
-
-    See Also
-    --------
-    :class:`physicsnemo.models.transolver.Physics_Attention.PhysicsAttentionIrregularMesh` : Base physics attention class.
-    :class:`GALE_block` : Transformer block using GALE attention.
-    """
-
-    def __init__(
-        self,
-        dim,
-        heads: int = 8,
-        dim_head: int = 64,
-        dropout: float = 0.0,
-        slice_num: int = 64,
-        use_te: bool = True,
-        plus: bool = False,
-        context_dim: int = 0,
-    ):
-        super().__init__(dim, heads, dim_head, dropout, slice_num, use_te, plus)
-
-        linear_layer = te.Linear if self.use_te else nn.Linear
-
-        # We have additional parameters, here:
-        self.cross_q = linear_layer(dim_head, dim_head)
-        self.cross_k = linear_layer(context_dim, dim_head)
-        self.cross_v = linear_layer(context_dim, dim_head)
-
-        # This is the learnable mixing weight between self and cross attention.
-        # We start near 0.0 since it is passed through a sigmoid to keep the
-        # mixing weight between 0 and 1.
-        self.state_mixing = nn.Parameter(torch.tensor(0.0))
-
-    def compute_slice_attention_cross(
-        self, slice_tokens: torch.Tensor, context: torch.Tensor
-    ) -> torch.Tensor:
-        r"""Compute cross-attention between slice tokens and context.
-
-        Parameters
-        ----------
-        slice_tokens : torch.Tensor
-            Slice tokens of shape \((B, H, N, D)\) where \(B\) is batch size, \(H\) is number of heads, \(N\) is number of slices, and \(D\) is head dimension.
-        context : torch.Tensor
-            Context tensor of shape \((B, H, N_c, D_c)\) where \(N_c\) is number of context slices and \(D_c\) is context dimension.
-
-        Returns
-        -------
-        torch.Tensor
-            Cross-attention output of shape \((B, H, N, D)\).
-        """
-
-        # Project the slice and context tokens:
-
-        q_input = torch.cat(slice_tokens, dim=-2)
-        q = self.cross_q(q_input)
-        
-        k = self.cross_k(context)
-        v = self.cross_v(context)
-
-        # Compute the attention:
-        if self.use_te:
-            q = rearrange(q, "b h s d -> b s h d")
-            k = rearrange(k, "b h s d -> b s h d")
-            v = rearrange(v, "b h s d -> b s h d")
-            cross_attention = self.attn_fn(q, k, v)
-            cross_attention = rearrange(
-                cross_attention, "b s (h d) -> b h s d", h=self.heads, d=self.dim_head
-            )
-        else:
-            cross_attention = torch.nn.functional.scaled_dot_product_attention(
-                q, k, v, is_causal=False
-            )
-        cross_attention = torch.split(cross_attention, slice_tokens[0].shape[-2], dim=-2)
-
-
-        return cross_attention
-
-    def forward(
-        self, x: tuple[torch.Tensor, ...], context: tuple[torch.Tensor, ...] | None = None
-    ) -> torch.Tensor:
-        r"""Forward pass of the GALE module.
-
-        Parameters
-        ----------
-        x : torch.Tensor
-            Input tensor of shape \((B, N, C)\) where \(B\) is batch size, \(N\) is number of tokens, and \(C\) is number of channels.
-        context : torch.Tensor, optional
-            Context tensor for cross-attention of shape \((B, H, S_c, D_c)\) where \(H\) is number of heads, \(S_c\) is number of context slices, and \(D_c\) is context dimension. If None, only self-attention is applied. Default is None.
-
-        Returns
-        -------
-        torch.Tensor
-            Output tensor of shape \((B, N, C)\), same shape as input.
-        """
-        # Project the inputs onto learned spaces:
-        if self.plus:
-            x_mid = [ self.project_input_onto_slices(_x) for _x in x ]
-            # In transolver ++, fx_mid is gone.
-            # x_mid is used to compute the projections instead:
-            fx_mid = [ _x_mid for _x_mid in x_mid ]
-        else:
-            x_mid, fx_mid = zip(*[ self.project_input_onto_slices(_x) for _x in x ])
-
-        # Perform the linear projection of learned latent space onto slices:
-        slice_projections = [ self.in_project_slice(_x_mid) for _x_mid in x_mid ]
-
-        # Slice projections has shape [B, N_head, N_tokens, Head_dim], but head_dim may have changed!
-        # Use the slice projections and learned spaces to compute the slices, and their weights:
-        slice_weights, slice_tokens = zip(*[self.compute_slices_from_projections(proj, _fx_mid) for proj, _fx_mid in zip(slice_projections, fx_mid)])
-        # slice_weights has shape [Batch, N_heads, N_tokens, Slice_num]
-        # slice_tokens has shape  [Batch, N_heads, N_tokens, head_dim]
-        # Apply attention to the slice tokens
-        if self.use_te:
-            self_slice_token = [ self.compute_slice_attention_te(_slice_token) for _slice_token in slice_tokens ]
-        else:
-            self_slice_token = [ self.compute_slice_attention_sdpa(_slice_token) for _slice_token in slice_tokens ]
-        
-        # HERE, we are differing: apply cross-attention with physical states:
-        if context is not None:
-            # cross_slice_token = self.compute_slice_attention_cross(
-            #     slice_tokens, context
-            # )
-            cross_slice_token = [ self.compute_slice_attention_cross([_slice_token], context)[0] 
-                for _slice_token in slice_tokens 
-            ]
-            
-            # Apply learnable mixing:
-            mixing_weight = torch.sigmoid(self.state_mixing)
-            out_slice_token = [ mixing_weight * sst + (1 - mixing_weight) * cst
-                for sst, cst in zip(self_slice_token, cross_slice_token)
-            ]
-
-        else:
-            # Just keep self attention:
-            out_slice_token = self_slice_token
-
-        # Shape unchanged
-
-        # Deslice:
-        outputs = [
-            self.project_attention_outputs(ost, sw) for ost, sw in zip(out_slice_token, slice_weights)
-        ]
-
-        # Outputs now has the same shape as the original input x
-
-        return outputs
-
-
-class GALE_block(nn.Module):
-    r"""Transformer encoder block using GALE attention.
-
-    This block replaces standard self-attention with the GALE (Geometry-Aware Latent
-    Embeddings) attention mechanism, which combines physics-aware self-attention with
-    cross-attention to geometry and global context.
-
-    Parameters
-    ----------
-    num_heads : int
-        Number of attention heads.
-    hidden_dim : int
-        Hidden dimension of the transformer.
-    dropout : float
-        Dropout rate.
-    act : str, optional
-        Activation function name. Default is "gelu".
-    mlp_ratio : int, optional
-        Ratio of MLP hidden dimension to ``hidden_dim``. Default is 4.
-    last_layer : bool, optional
-        Whether this is the last layer in the model. Default is False.
-    out_dim : int, optional
-        Output dimension (only used if ``last_layer=True``). Default is 1.
-    slice_num : int, optional
-        Number of learned physical state slices. Default is 32.
-    use_te : bool, optional
-        Whether to use Transformer Engine backend. Default is True.
-    plus : bool, optional
-        Whether to use Transolver++ features. Default is False.
-    context_dim : int, optional
-        Dimension of the context vector for cross-attention. Default is 0.
-
-    Notes
-    -----
-    The block applies layer normalization before the attention operation and uses
-    residual connections after both the attention and MLP layers.
-    """
-
-    def __init__(
-        self,
-        num_heads: int,
-        hidden_dim: int,
-        dropout: float,
-        act="gelu",
-        mlp_ratio=4,
-        last_layer=False,
-        out_dim=1,
-        slice_num=32,
-        use_te=True,
-        plus: bool = False,
-        context_dim: int = 0,
-    ):
-        super().__init__()
-
-        if use_te and not TE_AVAILABLE:
-            raise ImportError(
-                "Transformer Engine is not installed. Please install it with: pip install transformer-engine>=0.1.0"
-            )
-
-        self.last_layer = last_layer
-        if use_te:
-            self.ln_1 = te.LayerNorm(hidden_dim)
-        else:
-            self.ln_1 = nn.LayerNorm(hidden_dim)
-
-        self.Attn = GALE(
-            hidden_dim,
-            heads=num_heads,
-            dim_head=hidden_dim // num_heads,
-            dropout=dropout,
-            slice_num=slice_num,
-            use_te=use_te,
-            plus=plus,
-            context_dim=context_dim,
-        )
-
-        if use_te:
-            self.ln_mlp1 = te.LayerNormMLP(
-                hidden_size=hidden_dim,
-                ffn_hidden_size=hidden_dim * mlp_ratio,
-            )
-        else:
-            self.ln_mlp1 = nn.Sequential(
-                nn.LayerNorm(hidden_dim),
-                MLP(
-                    hidden_dim,
-                    hidden_dim * mlp_ratio,
-                    hidden_dim,
-                    n_layers=0,
-                    res=False,
-                    act=act,
-                    use_te=False,
-                ),
-            )
-
-    def forward(self, fx: tuple[torch.Tensor, ...], global_context: tuple[torch.Tensor, ...]) -> torch.Tensor:
-        r"""Forward pass of the GALE block.
-
-        Parameters
-        ----------
-        fx : torch.Tensor
-            Input tensor of shape \((B, N, C)\) where \(B\) is batch size, \(N\) is number of tokens, and \(C\) is hidden dimension.
-        global_context : torch.Tensor
-            Global context tensor for cross-attention of shape \((B, H, S_c, D_c)\) where \(H\) is number of heads, \(S_c\) is number of context slices, and \(D_c\) is context dimension.
-
-        Returns
-        -------
-        torch.Tensor
-            Output tensor of shape \((B, N, C)\), same shape as input.
-        """
-        
-        normed_inputs = [ self.ln_1(_fx) for _fx in fx ]
-        attn = self.Attn(normed_inputs, global_context)
-        
-        fx = [ attn[i] + normed_inputs[i] for i in range(len(normed_inputs)) ]
-        
-        fx = [ self.ln_mlp1(_fx) + _fx for _fx in fx ]
-
-        return fx
 
 
 @dataclass
@@ -372,220 +58,6 @@ class TyphonMetaData(ModelMetaData):
     auto_grad: bool = False
 
 
-class ContextProjector(nn.Module):
-    r"""Projects context features onto physical state space.
-
-    This context projector is conceptually similar to half of a GALE attention layer.
-    It projects context values (geometry or global embeddings) onto a learned physical
-    state space, but unlike a full attention layer, it never projects back to the
-    original space. The projected features are used as context in all GALE blocks
-    of the Typhon model.
-
-    Parameters
-    ----------
-    dim : int
-        Input dimension of the context features.
-    heads : int, optional
-        Number of projection heads. Default is 8.
-    dim_head : int, optional
-        Dimension of each projection head. Default is 64.
-    dropout : float, optional
-        Dropout rate. Default is 0.0.
-    slice_num : int, optional
-        Number of learned physical state slices. Default is 64.
-    use_te : bool, optional
-        Whether to use Transformer Engine backend when available. Default is True.
-    plus : bool, optional
-        Whether to use Transolver++ features. Default is False.
-
-    Notes
-    -----
-    The global features are reused in all blocks of the model, so the learned
-    projections must capture globally useful features rather than layer-specific ones.
-
-    See Also
-    --------
-    :class:`GALE` : Full GALE attention layer that uses these projected context features.
-    :class:`Typhon` : Main model that uses ContextProjector for geometry and global embeddings.
-    """
-
-    def __init__(
-        self,
-        dim,
-        heads: int = 8,
-        dim_head: int = 64,
-        dropout: float = 0.0,
-        slice_num: int = 64,
-        use_te: bool = True,
-        plus: bool = False,
-    ):
-        super().__init__()
-        inner_dim = dim_head * heads
-        self.dim_head = dim_head
-        self.heads = heads
-        self.plus = plus
-        self.scale = dim_head**-0.5
-        self.use_te = use_te
-
-        # Keep below here:
-        if use_te:
-            self.in_project_x = te.Linear(dim, inner_dim)
-            if not plus:
-                self.in_project_fx = te.Linear(dim, inner_dim)
-        else:
-            self.in_project_x = nn.Linear(dim, inner_dim)
-            if not plus:
-                self.in_project_fx = nn.Linear(dim, inner_dim)
-
-        self.softmax = nn.Softmax(dim=-1)
-        self.dropout = nn.Dropout(dropout)
-        self.temperature = nn.Parameter(torch.ones([1, heads, 1, 1]) * 0.5)
-
-        if plus:
-            linear_layer = te.Linear if self.use_te else nn.Linear
-            self.proj_temperature = torch.nn.Sequential(
-                linear_layer(self.dim_head, slice_num),
-                nn.GELU(),
-                linear_layer(slice_num, 1),
-                nn.GELU(),
-            )
-
-        if self.use_te:
-            self.in_project_slice = te.Linear(dim_head, slice_num)
-        else:
-            self.in_project_slice = nn.Linear(dim_head, slice_num)
-
-    def project_input_onto_slices(
-        self, x: torch.Tensor
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        r"""Project the input onto the slice space.
-
-        Parameters
-        ----------
-        x : torch.Tensor
-            Input tensor of shape \((B, N, C)\) where \(B\) is batch size, \(N\) is number of tokens, and \(C\) is number of channels.
-
-        Returns
-        -------
-        torch.Tensor or tuple[torch.Tensor, torch.Tensor]
-            If ``plus=True``, returns single tensor ``x_mid`` of shape \((B, H, N, D)\) where \(H\) is number of heads and \(D\) is head dimension. If ``plus=False``, returns tuple ``(x_mid, fx_mid)`` both of shape \((B, H, N, D)\).
-        """
-        x_mid = rearrange(
-            self.in_project_x(x), "B N (h d) -> B h N d", h=self.heads, d=self.dim_head
-        )
-        if self.plus:
-            return x_mid
-        else:
-            fx_mid = rearrange(
-                self.in_project_fx(x),
-                "B N (h d) -> B h N d",
-                h=self.heads,
-                d=self.dim_head,
-            )
-
-            return x_mid, fx_mid
-
-    def compute_slices_from_projections(
-        self, slice_projections: torch.Tensor, fx: torch.Tensor
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        r"""Compute slice weights and slice tokens from input projections and latent features.
-
-        Parameters
-        ----------
-        slice_projections : torch.Tensor
-            Projected input tensor of shape \((B, N, H, S)\) where \(B\) is batch size, \(H\) is number of heads, \(N\) is number of tokens, and \(S\) is number of slices, representing the projection of each token onto each slice for each attention head.
-        fx : torch.Tensor
-            Latent feature tensor of shape \((B, N, H, D)\) where \(D\) is head dimension, representing the learned states to be aggregated by the slice weights.
-
-        Returns
-        -------
-        tuple[torch.Tensor, torch.Tensor]
-            - ``slice_weights``: Tensor of shape \((B, N, H, S)\), representing the normalized weights for each slice per token and head.
-            - ``slice_token``: Tensor of shape \((B, H, S, D)\), representing the aggregated latent features for each slice, head, and batch.
-
-        Notes
-        -----
-        The function computes a temperature-scaled softmax over the slice projections to obtain
-        slice weights, then aggregates the latent features for each slice using these weights.
-        The aggregated features are normalized by the sum of weights for numerical stability.
-        """
-
-        # Project the latent space vectors on to the weight computation space,
-        # and compute a temperature adjusted softmax.
-
-        if self.plus:
-            temperature = self.temperature + self.proj_temperature(fx)
-            clamped_temp = torch.clamp(temperature, min=0.01).to(
-                slice_projections.dtype
-            )
-            slice_weights = gumbel_softmax(
-                slice_projections, clamped_temp
-            )  # [Batch, N_heads, N_tokens, Slice_num]
-
-        else:
-            clamped_temp = torch.clamp(self.temperature, min=0.5, max=5).to(
-                slice_projections.dtype
-            )
-            slice_weights = nn.functional.softmax(
-                slice_projections / clamped_temp, dim=-1
-            )  # [Batch, N_heads, N_tokens, Slice_num]
-
-        # Cast to the computation type (since the parameter is probably fp32)
-        slice_weights = slice_weights.to(slice_projections.dtype)
-
-        # This does the projection of the latent space fx by the weights:
-
-        # Computing the slice tokens is a matmul followed by a normalization.
-        # It can, unfortunately, overflow in reduced precision, so normalize first:
-        slice_norm = slice_weights.sum(2)  # [Batch, N_heads, Slice_num]
-        normed_weights = slice_weights / (slice_norm[:, :, None, :] + 1e-2)
-        slice_token = torch.matmul(normed_weights.transpose(2, 3), fx)
-
-        # Return the original weights, not the normed weights:
-        return slice_weights, slice_token
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        r"""Reduced forward pass projecting inputs to physical state slices.
-
-        This performs a partial physics attention operation: it projects the input onto
-        learned physical state slices but does not project back to the original space.
-
-        Parameters
-        ----------
-        x : torch.Tensor
-            Input tensor of shape \((B, N, C)\) where \(B\) is batch size, \(N\) is number of tokens, and \(C\) is number of channels.
-
-        Returns
-        -------
-        torch.Tensor
-            Slice tokens of shape \((B, H, S, D)\) where \(H\) is number of heads, \(S\) is number of slices, and \(D\) is head dimension.
-        """
-
-        # All of this is derived from the PhysicsAttention Layer
-
-        # Project the inputs onto learned spaces:
-        if self.plus:
-            x_mid = self.project_input_onto_slices(x)
-            # In transolver ++, fx_mid is gone.
-            # x_mid is used to compute the projections instead:
-            fx_mid = x_mid
-        else:
-            x_mid, fx_mid = self.project_input_onto_slices(x)
-
-        # Perform the linear projection of learned latent space onto slices:
-        slice_projections = self.in_project_slice(x_mid)
-
-        # Slice projections has shape [B, N_head, N_tokens, Head_dim], but head_dim may have changed!
-
-        # Use the slice projections and learned spaces to compute the slices, and their weights:
-        _, slice_tokens = self.compute_slices_from_projections(
-            slice_projections, fx_mid
-        )
-        # _ has shape [Batch, N_heads, N_tokens, Slice_num]
-        # slice_tokens has shape  [Batch, N_heads, N_tokens, head_dim]
-
-        return slice_tokens
-
 def _normalize_dim(x):
     # Accept int as scalar
     if isinstance(x, int):
@@ -763,82 +235,26 @@ def __init__(
         functional_dims = _normalize_dim(functional_dim)
         out_dims = _normalize_dim(out_dim)
 
-        # These are to project geometry embeddings and global embeddings onto
-        # a physical state space:
-        context_dim = 0
-        if geometry_dim is not None and self.include_local_features:
-            self.radii = radii
-            self.neighbors_in_radius = neighbors_in_radius
-
-            self.bq_warp = nn.ModuleList()
-            self.geo_conv_in = nn.ModuleList()
-            self.geo_conv_out = nn.ModuleList()
-            self.geometry_features_tokenizer = nn.ModuleList()
-
-            for i in range(len(functional_dims)):
-                self.bq_warp_list = nn.ModuleList()
-                self.geo_conv_in_list = nn.ModuleList()
-                self.geo_conv_out_list = nn.ModuleList()
-                self.geometry_features_tokenizer_list = nn.ModuleList()
-
-                for h in range(len(self.radii)):
-                    self.bq_warp_list.append(BQWarp(
-                        radius=radii[h],
-                        neighbors_in_radius=neighbors_in_radius[h],
-                    ))
-
-                    self.geo_conv_in_list.append(
-                        Mlp(
-                            in_features=geometry_dim * neighbors_in_radius[h],
-                            hidden_features=[n_hidden_local, n_hidden_local // 2],
-                            out_features=n_hidden_local,
-                            act_layer=nn.GELU,
-                            drop=0.0,
-                        )
-                    )
-
-                    self.geo_conv_out_list.append(
-                        Mlp(
-                            in_features=geometry_dim * neighbors_in_radius[h],
-                            hidden_features=[n_hidden_local, n_hidden_local // 2],
-                            out_features=n_hidden_local,
-                            act_layer=nn.GELU,
-                            drop=0.0,
-                        )
-                    )
-                    
-                    self.geometry_features_tokenizer_list.append(ContextProjector(
-                        n_hidden_local,
-                        n_head,
-                        n_hidden // n_head,
-                        dropout,
-                        slice_num,
-                        use_te,
-                        plus,
-                    ))
-                    context_dim += n_hidden // n_head
-
-                self.bq_warp.append(nn.ModuleList(self.bq_warp_list))
-                self.geo_conv_in.append(nn.ModuleList(self.geo_conv_in_list))
-                self.geo_conv_out.append(nn.ModuleList(self.geo_conv_out_list))
-                self.geometry_features_tokenizer.append(nn.ModuleList(self.geometry_features_tokenizer_list))
-
-        if geometry_dim is not None:
-            self.geometry_tokenizer = ContextProjector(
-                geometry_dim,
-                n_head,
-                n_hidden // n_head,
-                dropout,
-                slice_num,
-                use_te,
-                plus,
-            )
-            context_dim += n_hidden // n_head
-        if global_dim is not None:
-            self.global_tokenizer = ContextProjector(
-                global_dim, n_head, n_hidden // n_head, dropout, slice_num, use_te, plus
-            )
-            context_dim += n_hidden // n_head
+        # Store radii for hidden dimension calculation
+        self.radii = radii if self.include_local_features else []
+
+        # Initialize the context builder - handles all context construction
+        self.context_builder = GlobalContextBuilder(
+            functional_dims=functional_dims,
+            geometry_dim=geometry_dim,
+            global_dim=global_dim,
+            radii=radii,
+            neighbors_in_radius=neighbors_in_radius,
+            n_hidden_local=n_hidden_local,
+            n_hidden=n_hidden,
+            n_head=n_head,
+            dropout=dropout,
+            slice_num=slice_num,
+            use_te=use_te,
+            plus=plus,
+            include_local_features=self.include_local_features,
+        )
+        context_dim = self.context_builder.get_context_dim()
       
         if len(functional_dims) != len(out_dims):
             raise ValueError(
@@ -913,6 +329,7 @@ def __init__(
     def forward(
         self,
         local_embedding: torch.Tensor | tuple[torch.Tensor, ...],
+        local_positions: torch.Tensor | tuple[torch.Tensor, ...] | None = None,
         global_embedding: torch.Tensor | None = None,
         geometry: torch.Tensor | None = None,
         time: torch.Tensor | None = None,
@@ -934,52 +351,19 @@ def forward(
                                       "Error rather than silently ignoring it.")
 
         local_embedding = _normalize_tensor(local_embedding)
-        # First, construct the global context vectors:
-        global_context_input = []
-        if geometry is not None:
-            if self.include_local_features:
-                for i in range(len(local_embedding)):
-                    for h in range(len(self.radii)):
-                        mapping, k_short = self.bq_warp[i][h](local_embedding[i][:, :, :3], geometry)
-                        b, n_points, n_neighbors, c = k_short.shape
-                        k_short_reshaped = rearrange(
-                            k_short, "b x y z -> b x (y z)", x=n_points, y=n_neighbors, z=c
-                        )
-                        geometry_features = F.tanh(self.geo_conv_in[i][h](k_short_reshaped))
-                        geometry_states = self.geometry_features_tokenizer[i][h](geometry_features)
-                        global_context_input.append(geometry_states)
-            geometry_states = self.geometry_tokenizer(geometry)
-            global_context_input.append(geometry_states)
+        if local_positions is not None:
+            local_positions = _normalize_tensor(local_positions)
         
-        if global_embedding is not None:
-            global_states = self.global_tokenizer(global_embedding)
-            global_context_input.append(global_states)
-
-        # Construct the embedding states:
-        if len(global_context_input) > 0:
-            embedding_states = torch.cat(global_context_input, dim=-1)
-        else:
-            embedding_states = None
-
-
-        if self.include_local_features and geometry is not None:
-            local_embedding_bq = []
-            for i in range(len(local_embedding)):
-                local_embedding_list_radii = []
-                for h in range(len(self.radii)):
-                    mapping, k_short = self.bq_warp[i][h](geometry, local_embedding[i][:, :, :3])
-                    b, n_points, n_neighbors, c = k_short.shape
-                    k_short_reshaped = rearrange(
-                        k_short, "b x y z -> b x (y z)", x=n_points, y=n_neighbors, z=c
-                    )
-                    local_features = F.tanh(self.geo_conv_out[i][h](k_short_reshaped))
-                    local_embedding_list_radii.append(local_features)
-                local_embedding_bq.append(torch.cat(local_embedding_list_radii, dim=-1))
+        # Build context and extract local features using the context builder
+        embedding_states, local_embedding_bq = self.context_builder.build_context(
+            local_embedding, local_positions, geometry, global_embedding
+        )
 
-        # Project the inputs to the hidden dimension:
-        x = [ self.preprocess[i](le) for i, le in enumerate(local_embedding) ]
+        # Project inputs to hidden dimension
+        x = [self.preprocess[i](le) for i, le in enumerate(local_embedding)]
 
-        if self.include_local_features:
+        # Concatenate local features if enabled
+        if self.include_local_features and local_embedding_bq is not None:
             x = [torch.cat([x[i], local_embedding_bq[i]], dim=-1) for i in range(len(x))]
 
         for block in self.blocks:
diff --git a/test/models/typhon/test_gale.py b/test/models/typhon/test_gale.py
index 4ecc2ff587..1c88dc0ecd 100644
--- a/test/models/typhon/test_gale.py
+++ b/test/models/typhon/test_gale.py
@@ -17,7 +17,7 @@
 import pytest
 import torch
 
-from physicsnemo.experimental.models.typhon.typhon import (
+from physicsnemo.experimental.models.typhon.gale import (
     GALE,
     GALE_block,
 )
diff --git a/test/models/typhon/test_typhon.py b/test/models/typhon/test_typhon.py
index 5b119b2f72..3a4b09e5b7 100644
--- a/test/models/typhon/test_typhon.py
+++ b/test/models/typhon/test_typhon.py
@@ -78,7 +78,7 @@ def test_typhon_forward(device, use_geometry, use_global):
     ).to(device)
 
     local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
-
+    local_positions = local_emb[:, :, :3]
     kwargs = {}
     if use_geometry:
         kwargs["geometry"] = torch.randn(batch_size, n_geom_tokens, geometry_dim).to(
@@ -89,7 +89,7 @@ def test_typhon_forward(device, use_geometry, use_global):
             device
         )
 
-    outputs = model(local_emb, **kwargs)
+    outputs = model(local_emb, local_positions, **kwargs)
 
     assert isinstance(outputs, torch.Tensor)
     assert outputs.shape == (batch_size, n_tokens, 4)
@@ -130,11 +130,14 @@ def test_typhon_forward_tuple_inputs(device):
 
     local_emb_1 = torch.randn(batch_size, n_tokens_1, functional_dims[0]).to(device)
     local_emb_2 = torch.randn(batch_size, n_tokens_2, functional_dims[1]).to(device)
+    local_positions_1 = local_emb_1[:, :, :3]
+    local_positions_2 = local_emb_2[:, :, :3]
     geometry = torch.randn(batch_size, n_geom, 3).to(device)
     global_emb = torch.randn(batch_size, n_global, 16).to(device)
 
     outputs = model(
         (local_emb_1, local_emb_2),
+        local_positions=(local_positions_1, local_positions_2),
         global_embedding=global_emb,
         geometry=geometry,
     )
@@ -181,10 +184,16 @@ def test_typhon_forward_with_local_features(device, pytestconfig):
 
     # For local features, the first 3 channels of local_emb should be coordinates
     local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
+    local_positions = local_emb[:, :, :3]
     geometry = torch.randn(batch_size, n_geom, 3).to(device)
     global_emb = torch.randn(batch_size, n_global, 16).to(device)
 
-    outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
+    outputs = model(
+        local_emb,
+        local_positions=local_positions,
+        global_embedding=global_emb,
+        geometry=geometry,
+    )
 
     assert isinstance(outputs, torch.Tensor)
     assert outputs.shape == (batch_size, n_tokens, 4)
@@ -225,12 +234,13 @@ def test_typhon_forward_accuracy_basic(device):
     n_global = 5
 
     local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
+    local_positions = local_emb[:, :, :3]
     geometry = torch.randn(batch_size, n_geom, 3).to(device)
     global_emb = torch.randn(batch_size, n_global, 16).to(device)
 
     assert validate_forward_accuracy(
         model,
-        (local_emb, global_emb, geometry),
+        (local_emb, local_positions, global_emb, geometry),
         file_name="typhon_basic_output.pth",
         atol=1e-3,
     )
@@ -270,12 +280,20 @@ def test_typhon_forward_accuracy_tuple(device):
 
     local_emb_1 = torch.randn(batch_size, n_tokens_1, functional_dims[0]).to(device)
     local_emb_2 = torch.randn(batch_size, n_tokens_2, functional_dims[1]).to(device)
+
+    local_positions_1 = local_emb_1[:, :, :3]
+    local_positions_2 = local_emb_2[:, :, :3]
     geometry = torch.randn(batch_size, n_geom, 3).to(device)
     global_emb = torch.randn(batch_size, n_global, 16).to(device)
 
     assert validate_forward_accuracy(
         model,
-        ((local_emb_1, local_emb_2), global_emb, geometry),
+        (
+            (local_emb_1, local_emb_2),
+            (local_positions_1, local_positions_2),
+            global_emb,
+            geometry,
+        ),
         file_name="typhon_tuple_output.pth",
         atol=1e-3,
     )
@@ -317,35 +335,36 @@ def setup_model():
         local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
         geometry = torch.randn(batch_size, n_tokens, 3).to(device)
         global_emb = torch.randn(batch_size, n_global, 16).to(device)
-
-        return model, local_emb, global_emb, geometry
+        local_positions = local_emb[:, :, :3]
+        return model, local_emb, local_positions, global_emb, geometry
 
     # Check CUDA graphs
-    model, local_emb, global_emb, geometry = setup_model()
+    model, local_emb, local_positions, global_emb, geometry = setup_model()
+
     assert validate_cuda_graphs(
         model,
-        (local_emb, global_emb, geometry),
+        (local_emb, local_positions, global_emb, geometry),
     )
 
     # Check JIT
-    model, local_emb, global_emb, geometry = setup_model()
+    model, local_emb, local_positions, global_emb, geometry = setup_model()
     assert validate_jit(
         model,
-        (local_emb, global_emb, geometry),
+        (local_emb, local_positions, global_emb, geometry),
     )
 
     # Check AMP
-    model, local_emb, global_emb, geometry = setup_model()
+    model, local_emb, local_positions, global_emb, geometry = setup_model()
     assert validate_amp(
         model,
-        (local_emb, global_emb, geometry),
+        (local_emb, local_positions, global_emb, geometry),
     )
 
     # Check Combo
-    model, local_emb, global_emb, geometry = setup_model()
+    model, local_emb, local_positions, global_emb, geometry = setup_model()
     assert validate_combo_optims(
         model,
-        (local_emb, global_emb, geometry),
+        (local_emb, local_positions, global_emb, geometry),
     )
 
 
@@ -386,8 +405,14 @@ def test_typhon_te_basic(device, pytestconfig):
     local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
     geometry = torch.randn(batch_size, n_geom, 3).to(device)
     global_emb = torch.randn(batch_size, n_global, 16).to(device)
+    local_positions = local_emb[:, :, :3]
 
-    outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
+    outputs = model(
+        local_emb,
+        local_positions=local_positions,
+        global_embedding=global_emb,
+        geometry=geometry,
+    )
 
     assert isinstance(outputs, torch.Tensor)
     assert outputs.shape == (batch_size, n_tokens, 4)
@@ -447,11 +472,11 @@ def test_typhon_checkpoint(device):
     local_emb = torch.randn(batch_size, n_tokens, 32).to(device)
     geometry = torch.randn(batch_size, n_tokens, 3).to(device)
     global_emb = torch.randn(batch_size, n_global, 16).to(device)
-
+    local_positions = local_emb[:, :, :3]
     assert validate_checkpoint(
         model_1,
         model_2,
-        (local_emb, global_emb, geometry),
+        (local_emb, local_positions, global_emb, geometry),
     )
 
 
@@ -512,7 +537,7 @@ def test_typhon_checkpoint_tuple(device):
     assert validate_checkpoint(
         model_1,
         model_2,
-        ((local_emb_1, local_emb_2), global_emb, geometry),
+        ((local_emb_1, local_emb_2), (None, None), global_emb, geometry),
     )
 
 
@@ -583,7 +608,9 @@ def test_typhon_activations(device, activation):
     geometry = torch.randn(batch_size, n_geom, 3).to(device)
     global_emb = torch.randn(batch_size, n_global, 16).to(device)
 
-    outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
+    outputs = model(
+        local_emb, local_positions=None, global_embedding=global_emb, geometry=geometry
+    )
 
     assert isinstance(outputs, torch.Tensor)
     assert outputs.shape == (batch_size, n_tokens, 4)
@@ -628,7 +655,9 @@ def test_typhon_different_depths(device, n_layers):
     geometry = torch.randn(batch_size, n_geom, 3).to(device)
     global_emb = torch.randn(batch_size, n_global, 16).to(device)
 
-    outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
+    outputs = model(
+        local_emb, local_positions=None, global_embedding=global_emb, geometry=geometry
+    )
 
     assert isinstance(outputs, torch.Tensor)
     assert outputs.shape == (batch_size, n_tokens, 4)
@@ -668,7 +697,9 @@ def test_typhon_different_slice_nums(device, slice_num):
     geometry = torch.randn(batch_size, n_geom, 3).to(device)
     global_emb = torch.randn(batch_size, n_global, 16).to(device)
 
-    outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
+    outputs = model(
+        local_emb, local_positions=None, global_embedding=global_emb, geometry=geometry
+    )
 
     assert isinstance(outputs, torch.Tensor)
     assert outputs.shape == (batch_size, n_tokens, 4)
@@ -708,7 +739,9 @@ def test_typhon_different_hidden_sizes(device, n_hidden, n_head):
     geometry = torch.randn(batch_size, n_geom, 3).to(device)
     global_emb = torch.randn(batch_size, n_global, 16).to(device)
 
-    outputs = model(local_emb, global_embedding=global_emb, geometry=geometry)
+    outputs = model(
+        local_emb, local_positions=None, global_embedding=global_emb, geometry=geometry
+    )
 
     assert isinstance(outputs, torch.Tensor)
     assert outputs.shape == (batch_size, n_tokens, 4)

From 828ed7d9dddcf0205a543c1473a6bca9d951933f Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 8 Dec 2025 08:07:06 -0800
Subject: [PATCH 19/32] Snapshot before integrating BQ

---
 .../src/conf/transolver_surface.yaml               |  4 ++--
 .../src/conf/transolver_volume.yaml                |  4 ++--
 .../src/conf/typhon_surface.yaml                   |  4 ++--
 .../transformer_models/src/conf/typhon_volume.yaml |  6 +++---
 .../transformer_models/src/train.py                | 14 +++++++-------
 5 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml
index e45951a310..1da49c92a0 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml
@@ -17,7 +17,7 @@
 defaults:
   - training: base
   - model: transolver
-  - datapipe: surface
+  - data: surface
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
@@ -28,7 +28,7 @@ precision: float32 # float32, float16, bfloat16, or float8
 compile: true
 profile: false
 
-datapipe:
+data:
   include_sdf: false
 
 # Logging configuration
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml
index 3cc0ba5c27..bab0b9aedf 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml
@@ -17,14 +17,14 @@
 defaults:
   - training: base
   - model: transolver
-  - datapipe: volume
+  - data: volume
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
 run_id: "volume/bfloat16"
 
 # Performance considerations:
-precision: bfloat16 # float32, float16, bfloat16, or float8
+precision: float32 # float32, float16, bfloat16, or float8
 compile: true
 profile: false
 
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml
index 123d05f639..de1866126a 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml
@@ -17,7 +17,7 @@
 defaults:
   - training: base
   - model: typhon
-  - datapipe: surface
+  - data: surface
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
@@ -35,7 +35,7 @@ model:
   neighbors_in_radius: [4, 8, 16, 64, 128, 256]  # neighbors in radius for local features
   n_hidden_local: 32 # hidden dimension for local features
 
-datapipe:
+data:
   include_sdf: false
   include_geometry: true
   geometry_sampling: 300_000
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml
index 1e48580b6f..8c3152cd0e 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml
@@ -17,7 +17,7 @@
 defaults:
   - training: base
   - model: typhon
-  - datapipe: volume
+  - data: volume
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
@@ -25,10 +25,10 @@ run_id: "typhon/volume/bq"
 
 # Performance considerations:
 precision: float32 # float32, float16, bfloat16, or float8
-compile: false
+compile: true 
 profile: false
 
-datapipe:
+data:
   include_geometry: true
   geometry_sampling: 300_000
   broadcast_global_features: false
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/train.py b/examples/cfd/external_aerodynamics/transformer_models/src/train.py
index 823c31f0e1..59e84e4bfa 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/train.py
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/train.py
@@ -450,7 +450,7 @@ def train_epoch(
             precision,
             output_pad_size,
             dist_manager,
-            cfg.datapipe.mode,
+            cfg.data.mode,
             dataloader,
         )
 
@@ -566,7 +566,7 @@ def val_epoch(
                 precision,
                 output_pad_size,
                 dist_manager,
-                cfg.datapipe.mode,
+                cfg.data.mode,
                 dataloader,
             )
 
@@ -714,8 +714,8 @@ def main(cfg: DictConfig):
     logger.info(f"Number of parameters: {num_params}")
 
     # Load the normalization file from configured directory (defaults to current dir)
-    norm_dir = getattr(cfg.datapipe, "normalization_dir", ".")
-    if cfg.datapipe.mode == "surface" or cfg.datapipe.mode == "combined":
+    norm_dir = getattr(cfg.data, "normalization_dir", ".")
+    if cfg.data.mode == "surface" or cfg.data.mode == "combined":
         norm_file = str(Path(norm_dir) / "surface_fields_normalization.npz")
         norm_data = np.load(norm_file)
         surface_factors = {
@@ -725,7 +725,7 @@ def main(cfg: DictConfig):
     else:
         surface_factors = None
 
-    if cfg.datapipe.mode == "volume" or cfg.datapipe.mode == "combined":
+    if cfg.data.mode == "volume" or cfg.data.mode == "combined":
         norm_file = str(Path(norm_dir) / "volume_fields_normalization.npz")
         norm_data = np.load(norm_file)
         volume_factors = {
@@ -737,7 +737,7 @@ def main(cfg: DictConfig):
 
     # Training dataset
     train_dataloader = create_transolver_dataset(
-        cfg.datapipe,
+        cfg.data,
         phase="train",
         surface_factors=surface_factors,
         volume_factors=volume_factors,
@@ -746,7 +746,7 @@ def main(cfg: DictConfig):
     # Validation dataset
 
     val_dataloader = create_transolver_dataset(
-        cfg.datapipe,
+        cfg.data,
         phase="val",
         surface_factors=surface_factors,
         volume_factors=volume_factors,

From fc2e56d2c3edbe92ec4f9d515a0c73caa8daa57d Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 10 Dec 2025 16:57:15 +0000
Subject: [PATCH 20/32] Fix data dir name for transformer model configs

---
 .../transformer_models/src/conf/{datapipe => data}/core.yaml      | 0
 .../transformer_models/src/conf/{datapipe => data}/surface.yaml   | 0
 .../transformer_models/src/conf/{datapipe => data}/volume.yaml    | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename examples/cfd/external_aerodynamics/transformer_models/src/conf/{datapipe => data}/core.yaml (100%)
 rename examples/cfd/external_aerodynamics/transformer_models/src/conf/{datapipe => data}/surface.yaml (100%)
 rename examples/cfd/external_aerodynamics/transformer_models/src/conf/{datapipe => data}/volume.yaml (100%)

diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/datapipe/core.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/data/core.yaml
similarity index 100%
rename from examples/cfd/external_aerodynamics/transformer_models/src/conf/datapipe/core.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/src/conf/data/core.yaml
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/datapipe/surface.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/data/surface.yaml
similarity index 100%
rename from examples/cfd/external_aerodynamics/transformer_models/src/conf/datapipe/surface.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/src/conf/data/surface.yaml
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/datapipe/volume.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/data/volume.yaml
similarity index 100%
rename from examples/cfd/external_aerodynamics/transformer_models/src/conf/datapipe/volume.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/src/conf/data/volume.yaml

From da6b7108e1b8146ba376f2da86049d9d9f0211d8 Mon Sep 17 00:00:00 2001
From: root <root@nvl72092-T03.cm.cluster>
Date: Wed, 10 Dec 2025 09:13:52 -0800
Subject: [PATCH 21/32] fix minor bugs

---
 .../src/compute_normalizations.py                  | 10 +++++-----
 .../transformer_models/src/conf/data/core.yaml     |  4 ++--
 .../src/conf/transolver_surface.yaml               |  2 +-
 .../src/conf/transolver_volume.yaml                |  2 +-
 .../src/conf/typhon_surface.yaml                   |  2 +-
 .../transformer_models/src/conf/typhon_volume.yaml |  2 +-
 .../transformer_models/src/train.py                | 14 +++++++-------
 7 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/compute_normalizations.py b/examples/cfd/external_aerodynamics/transformer_models/src/compute_normalizations.py
index 749a7ab2f7..c75fdc9af7 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/compute_normalizations.py
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/compute_normalizations.py
@@ -113,10 +113,10 @@ def main(cfg: DictConfig) -> None:
     """
 
     # Choose which field to normalize (can be overridden via command line)
-    field_key: str = cfg.data.mode + "_fields"
+    field_key: str = cfg.datapipe.mode + "_fields"
 
     # Normalization directory can be configured (backward compatible: defaults to current directory)
-    normalization_dir: str = getattr(cfg.data, "normalization_dir", ".")
+    normalization_dir: str = getattr(cfg.datapipe, "normalization_dir", ".")
 
     # Construct full path using pathlib (cross-platform, concise)
     workspace_path: str = str(
@@ -127,14 +127,14 @@ def main(cfg: DictConfig) -> None:
 
     # Create the dataset using configuration parameters
     dataset = CAEDataset(
-        data_dir=cfg.data.train.data_path,
+        data_dir=cfg.datapipe.train.data_path,
         keys_to_read=[
             field_key,
         ],
         keys_to_read_if_available={},
         output_device=device,
-        preload_depth=cfg.data.preload_depth,
-        pin_memory=cfg.data.pin_memory,
+        preload_depth=cfg.datapipe.preload_depth,
+        pin_memory=cfg.datapipe.pin_memory,
     )
     # Compute normalization statistics
     mean, std, min_val, max_val = compute_mean_std_min_max(dataset, field_key, 100)
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/data/core.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/data/core.yaml
index 8521bce1a5..56e1f4e601 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/data/core.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/data/core.yaml
@@ -16,9 +16,9 @@
 
 # Paths to your data:
 train:
-  data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/shift_wing/typhon/train/
+  data_path:  /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/train/
 val:
-  data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/shift_wing/typhon/val/
+  data_path: /lustre/fsw/portfolios/coreai/projects/coreai_modulus_cae/datasets/drivaer_aws/domino/val/
 
 # You can set a normalization factor directory:
 normalization_dir: "src/"
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml
index 1da49c92a0..a728c64741 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml
@@ -17,7 +17,7 @@
 defaults:
   - training: base
   - model: transolver
-  - data: surface
+  - datapipe: surface
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml
index bab0b9aedf..66a432192e 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml
@@ -17,7 +17,7 @@
 defaults:
   - training: base
   - model: transolver
-  - data: volume
+  - datapipe: volume
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml
index de1866126a..82e6ed06d7 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml
@@ -17,7 +17,7 @@
 defaults:
   - training: base
   - model: typhon
-  - data: surface
+  - datapipe: surface
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml
index 8c3152cd0e..06063105a7 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml
@@ -17,7 +17,7 @@
 defaults:
   - training: base
   - model: typhon
-  - data: volume
+  - datapipe: volume
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/train.py b/examples/cfd/external_aerodynamics/transformer_models/src/train.py
index 59e84e4bfa..823c31f0e1 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/train.py
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/train.py
@@ -450,7 +450,7 @@ def train_epoch(
             precision,
             output_pad_size,
             dist_manager,
-            cfg.data.mode,
+            cfg.datapipe.mode,
             dataloader,
         )
 
@@ -566,7 +566,7 @@ def val_epoch(
                 precision,
                 output_pad_size,
                 dist_manager,
-                cfg.data.mode,
+                cfg.datapipe.mode,
                 dataloader,
             )
 
@@ -714,8 +714,8 @@ def main(cfg: DictConfig):
     logger.info(f"Number of parameters: {num_params}")
 
     # Load the normalization file from configured directory (defaults to current dir)
-    norm_dir = getattr(cfg.data, "normalization_dir", ".")
-    if cfg.data.mode == "surface" or cfg.data.mode == "combined":
+    norm_dir = getattr(cfg.datapipe, "normalization_dir", ".")
+    if cfg.datapipe.mode == "surface" or cfg.datapipe.mode == "combined":
         norm_file = str(Path(norm_dir) / "surface_fields_normalization.npz")
         norm_data = np.load(norm_file)
         surface_factors = {
@@ -725,7 +725,7 @@ def main(cfg: DictConfig):
     else:
         surface_factors = None
 
-    if cfg.data.mode == "volume" or cfg.data.mode == "combined":
+    if cfg.datapipe.mode == "volume" or cfg.datapipe.mode == "combined":
         norm_file = str(Path(norm_dir) / "volume_fields_normalization.npz")
         norm_data = np.load(norm_file)
         volume_factors = {
@@ -737,7 +737,7 @@ def main(cfg: DictConfig):
 
     # Training dataset
     train_dataloader = create_transolver_dataset(
-        cfg.data,
+        cfg.datapipe,
         phase="train",
         surface_factors=surface_factors,
         volume_factors=volume_factors,
@@ -746,7 +746,7 @@ def main(cfg: DictConfig):
     # Validation dataset
 
     val_dataloader = create_transolver_dataset(
-        cfg.data,
+        cfg.datapipe,
         phase="val",
         surface_factors=surface_factors,
         volume_factors=volume_factors,

From e08f243ee2033b48114193f7a6e69433f5c03b25 Mon Sep 17 00:00:00 2001
From: Rishi Ranade <rranade@oci-hsg-cs-001-login-01.cm.cluster>
Date: Wed, 10 Dec 2025 09:36:07 -0800
Subject: [PATCH 22/32] minor bug fix

---
 .../src/compute_normalizations.py             | 10 ++++----
 .../src/conf/transolver_surface.yaml          |  2 +-
 .../src/conf/transolver_volume.yaml           |  2 +-
 .../src/conf/typhon_surface.yaml              |  4 ++--
 .../src/conf/typhon_volume.yaml               |  5 ++--
 .../transformer_models/src/train.py           | 23 ++++++++++---------
 .../models/typhon/context_projector.py        |  4 ++--
 7 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/compute_normalizations.py b/examples/cfd/external_aerodynamics/transformer_models/src/compute_normalizations.py
index c75fdc9af7..749a7ab2f7 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/compute_normalizations.py
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/compute_normalizations.py
@@ -113,10 +113,10 @@ def main(cfg: DictConfig) -> None:
     """
 
     # Choose which field to normalize (can be overridden via command line)
-    field_key: str = cfg.datapipe.mode + "_fields"
+    field_key: str = cfg.data.mode + "_fields"
 
     # Normalization directory can be configured (backward compatible: defaults to current directory)
-    normalization_dir: str = getattr(cfg.datapipe, "normalization_dir", ".")
+    normalization_dir: str = getattr(cfg.data, "normalization_dir", ".")
 
     # Construct full path using pathlib (cross-platform, concise)
     workspace_path: str = str(
@@ -127,14 +127,14 @@ def main(cfg: DictConfig) -> None:
 
     # Create the dataset using configuration parameters
     dataset = CAEDataset(
-        data_dir=cfg.datapipe.train.data_path,
+        data_dir=cfg.data.train.data_path,
         keys_to_read=[
             field_key,
         ],
         keys_to_read_if_available={},
         output_device=device,
-        preload_depth=cfg.datapipe.preload_depth,
-        pin_memory=cfg.datapipe.pin_memory,
+        preload_depth=cfg.data.preload_depth,
+        pin_memory=cfg.data.pin_memory,
     )
     # Compute normalization statistics
     mean, std, min_val, max_val = compute_mean_std_min_max(dataset, field_key, 100)
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml
index a728c64741..1da49c92a0 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml
@@ -17,7 +17,7 @@
 defaults:
   - training: base
   - model: transolver
-  - datapipe: surface
+  - data: surface
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml
index 66a432192e..bab0b9aedf 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml
@@ -17,7 +17,7 @@
 defaults:
   - training: base
   - model: transolver
-  - datapipe: volume
+  - data: volume
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml
index 82e6ed06d7..5d5955c828 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml
@@ -17,7 +17,7 @@
 defaults:
   - training: base
   - model: typhon
-  - datapipe: surface
+  - data: surface
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
@@ -25,7 +25,7 @@ run_id: "typhon/surface/bq"
 
 # Performance considerations:
 precision: float32 # float32, float16, bfloat16, or float8
-compile: false
+compile: true
 profile: false
 
 model:
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml
index 06063105a7..d2e0189bf8 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml
@@ -17,7 +17,7 @@
 defaults:
   - training: base
   - model: typhon
-  - datapipe: volume
+  - data: volume
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
@@ -32,11 +32,12 @@ data:
   include_geometry: true
   geometry_sampling: 300_000
   broadcast_global_features: false
+  volume_sample_from_disk: true
 
 
 model:
   functional_dim: 7
-  out_dim: 4
+  out_dim: 5
   include_local_features: true # use local features
   radii: [0.01, 0.05, 0.25, 1.0, 2.5, 5.0] # radius for local features
   neighbors_in_radius: [4, 8, 16, 64, 128, 256]  # neighbors in radius for local features
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/train.py b/examples/cfd/external_aerodynamics/transformer_models/src/train.py
index 823c31f0e1..90379c76f2 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/train.py
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/train.py
@@ -307,9 +307,10 @@ def forward_pass(
             features, geometry = pad_input_for_fp8(features, embeddings, geometry)
 
         if "geometry" in batch.keys():
+            local_positions = embeddings[:, :, :3]
             # This is the Typhon path
             outputs = model(
-                global_embedding=features, local_embedding=embeddings, geometry=geometry
+                global_embedding=features, local_embedding=embeddings, geometry=geometry, local_positions=local_positions
             )
 
             outputs = unpad_output_for_fp8(outputs, output_pad_size)
@@ -317,9 +318,9 @@ def forward_pass(
             loss = loss_fn(outputs, targets)
             # Log them too:
             for i, mode in enumerate(modes):
-                all_metrics[f"loss/{mode}"] = loss[i]
+                all_metrics[f"loss/{mode}"] = loss.item()
             # Averaging over point cloud inputs, instead of summing.
-            full_loss = torch.mean(torch.stack(loss))
+            full_loss = torch.mean(loss)
 
         else:
             # This is the Transolver path
@@ -450,7 +451,7 @@ def train_epoch(
             precision,
             output_pad_size,
             dist_manager,
-            cfg.datapipe.mode,
+            cfg.data.mode,
             dataloader,
         )
 
@@ -476,7 +477,7 @@ def train_epoch(
             total_metrics = metrics
         else:
             total_metrics = {
-                k: total_metrics[k] + metrics[k].item() for k in metrics.keys()
+                k: total_metrics[k] + metrics[k] for k in metrics.keys()
             }
 
         duration = end_time - start_time
@@ -566,7 +567,7 @@ def val_epoch(
                 precision,
                 output_pad_size,
                 dist_manager,
-                cfg.datapipe.mode,
+                cfg.data.mode,
                 dataloader,
             )
 
@@ -714,8 +715,8 @@ def main(cfg: DictConfig):
     logger.info(f"Number of parameters: {num_params}")
 
     # Load the normalization file from configured directory (defaults to current dir)
-    norm_dir = getattr(cfg.datapipe, "normalization_dir", ".")
-    if cfg.datapipe.mode == "surface" or cfg.datapipe.mode == "combined":
+    norm_dir = getattr(cfg.data, "normalization_dir", ".")
+    if cfg.data.mode == "surface" or cfg.data.mode == "combined":
         norm_file = str(Path(norm_dir) / "surface_fields_normalization.npz")
         norm_data = np.load(norm_file)
         surface_factors = {
@@ -725,7 +726,7 @@ def main(cfg: DictConfig):
     else:
         surface_factors = None
 
-    if cfg.datapipe.mode == "volume" or cfg.datapipe.mode == "combined":
+    if cfg.data.mode == "volume" or cfg.data.mode == "combined":
         norm_file = str(Path(norm_dir) / "volume_fields_normalization.npz")
         norm_data = np.load(norm_file)
         volume_factors = {
@@ -737,7 +738,7 @@ def main(cfg: DictConfig):
 
     # Training dataset
     train_dataloader = create_transolver_dataset(
-        cfg.datapipe,
+        cfg.data,
         phase="train",
         surface_factors=surface_factors,
         volume_factors=volume_factors,
@@ -746,7 +747,7 @@ def main(cfg: DictConfig):
     # Validation dataset
 
     val_dataloader = create_transolver_dataset(
-        cfg.datapipe,
+        cfg.data,
         phase="val",
         surface_factors=surface_factors,
         volume_factors=volume_factors,
diff --git a/physicsnemo/experimental/models/typhon/context_projector.py b/physicsnemo/experimental/models/typhon/context_projector.py
index 26a4ce5ae3..9abdfaf19d 100644
--- a/physicsnemo/experimental/models/typhon/context_projector.py
+++ b/physicsnemo/experimental/models/typhon/context_projector.py
@@ -309,8 +309,8 @@ def forward(
         torch.Tensor
             Processed features of shape \((B, N, hidden_dim)\).
         """
-        print(f"query_points shape: {query_points.shape}")
-        print(f"key_features shape: {key_features.shape}")
+        # print(f"query_points shape: {query_points.shape}")
+        # print(f"key_features shape: {key_features.shape}")
         
         _, neighbors = self.bq_warp(query_points, key_features)
         b, n, k, c = neighbors.shape

From 8308a7a8990712c185e855c7520b6bdb0275d17a Mon Sep 17 00:00:00 2001
From: Rishi Ranade <rranade@oci-hsg-cs-001-login-01.cm.cluster>
Date: Wed, 10 Dec 2025 11:25:01 -0800
Subject: [PATCH 23/32] fixing bug in val_epoch

---
 .../cfd/external_aerodynamics/transformer_models/src/train.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/train.py b/examples/cfd/external_aerodynamics/transformer_models/src/train.py
index 90379c76f2..b5af14e686 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/train.py
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/train.py
@@ -575,7 +575,7 @@ def val_epoch(
                 total_metrics = metrics
             else:
                 total_metrics = {
-                    k: total_metrics[k] + metrics[k].item() for k in metrics.keys()
+                    k: total_metrics[k] + metrics[k] for k in metrics.keys()
                 }
 
             # Logging

From 6422a669f4757e27eaf792b2e2d933924dda9b29 Mon Sep 17 00:00:00 2001
From: Rishi Ranade <rranade@oci-hsg-cs-001-login-01.cm.cluster>
Date: Thu, 11 Dec 2025 02:02:22 -0800
Subject: [PATCH 24/32] fixing minor bug in inference_on_zarr

---
 .../src/inference_on_zarr.py                  | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/inference_on_zarr.py b/examples/cfd/external_aerodynamics/transformer_models/src/inference_on_zarr.py
index e363c14758..96bd1c388c 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/inference_on_zarr.py
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/inference_on_zarr.py
@@ -32,7 +32,7 @@
 from physicsnemo.utils.logging import PythonLogger, RankZeroLoggingWrapper
 
 from sklearn.metrics import r2_score
-from metrics_new import metrics_fn_surface, metrics_fn_volume
+from metrics import metrics_fn_surface, metrics_fn_volume
 
 from physicsnemo.distributed import DistributedManager
 
@@ -255,8 +255,8 @@ def inference(cfg: DictConfig) -> None:
     logger.info(f"Number of parameters: {num_params}")
 
     # Load the normalization file from configured directory (defaults to current dir)
-    norm_dir = getattr(cfg.datapipe, "normalization_dir", ".")
-    if cfg.datapipe.mode == "surface" or cfg.datapipe.mode == "combined":
+    norm_dir = getattr(cfg.data, "normalization_dir", ".")
+    if cfg.data.mode == "surface" or cfg.data.mode == "combined":
         norm_file = str(Path(norm_dir) / "surface_fields_normalization.npz")
         norm_data = np.load(norm_file)
         surface_factors = {
@@ -266,7 +266,7 @@ def inference(cfg: DictConfig) -> None:
     else:
         surface_factors = None
 
-    if cfg.datapipe.mode == "volume" or cfg.datapipe.mode == "combined":
+    if cfg.data.mode == "volume" or cfg.data.mode == "combined":
         norm_file = str(Path(norm_dir) / "volume_fields_normalization.npz")
         norm_data = np.load(norm_file)
         volume_factors = {
@@ -284,19 +284,19 @@ def inference(cfg: DictConfig) -> None:
     # so there is not downsampling.  We still batch it in the inference script
     # for memory usage constraints.
 
-    batch_resolution = cfg.datapipe.resolution
-    cfg.datapipe.resolution = None
+    batch_resolution = cfg.data.resolution
+    cfg.data.resolution = None
     ## Make sure to read the whole data sample for volume:
-    if cfg.datapipe.mode == "volume":
-        cfg.datapipe.volume_sample_from_disk = False
+    if cfg.data.mode == "volume":
+        cfg.data.volume_sample_from_disk = False
 
     # And we need the mesh features for drag, lift in surface data:
-    if cfg.datapipe.mode == "surface":
-        cfg.datapipe.return_mesh_features = True
+    if cfg.data.mode == "surface":
+        cfg.data.return_mesh_features = True
 
     # Validation dataset
     val_dataset = create_transolver_dataset(
-        cfg.datapipe,
+        cfg.data,
         phase="val",
         surface_factors=surface_factors,
         volume_factors=volume_factors,
@@ -311,7 +311,7 @@ def inference(cfg: DictConfig) -> None:
                     batch,
                     model,
                     cfg.precision,
-                    cfg.datapipe.mode,
+                    cfg.data.mode,
                     batch_resolution,
                     output_pad_size,
                     dist_manager,
@@ -326,7 +326,7 @@ def inference(cfg: DictConfig) -> None:
         air_density = batch["air_density"] if "air_density" in batch.keys() else None
         stream_velocity = batch["stream_velocity"] if "stream_velocity" in batch.keys() else None
 
-        if cfg.datapipe.mode == "surface":
+        if cfg.data.mode == "surface":
             coeff = 1.0
 
             if stream_velocity is not None:
@@ -435,7 +435,7 @@ def inference(cfg: DictConfig) -> None:
                 ]
             )
 
-        elif cfg.datapipe.mode == "volume":
+        elif cfg.data.mode == "volume":
             if stream_velocity is not None:
                 global_predictions[:, :, 3] = global_predictions[:, :, 3] * stream_velocity**2.0 * air_density
                 global_targets[:, :, 3] = global_targets[:, :, 3] * stream_velocity**2.0 * air_density
@@ -510,7 +510,7 @@ def inference(cfg: DictConfig) -> None:
                 ]
             )
 
-    if cfg.datapipe.mode == "surface":
+    if cfg.data.mode == "surface":
         pred_drag_coeffs = [r[8] for r in results]
         pred_lift_coeffs = [r[9] for r in results]
         true_drag_coeffs = [r[10] for r in results]
@@ -547,7 +547,7 @@ def inference(cfg: DictConfig) -> None:
             writer.writerows(results)
         logger.info(f"Results saved to {csv_filename}")
 
-    elif cfg.datapipe.mode == "volume":
+    elif cfg.data.mode == "volume":
         headers = [
             "Batch",
             "Loss",

From 9e0bcc05cd9fbc4b18d8ce258bd85cbca8216764 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 19 Dec 2025 15:21:57 +0000
Subject: [PATCH 25/32] Rename to geotransolver

---
 .../{typhon => geotransolver}/__init__.py     |   4 +-
 .../context_projector.py                      |  13 +-
 .../models/{typhon => geotransolver}/gale.py  |   9 +-
 .../geotransolver.py}                         |  36 ++---
 .../{typhon => geotransolver}/__init__.py     |   0
 .../data/geotransolver_basic_output.pth}      | Bin
 .../data/geotransolver_te_output.pth}         | Bin
 .../data/geotransolver_tuple_output.pth}      | Bin
 .../test_context_projector.py                 |   2 +-
 .../{typhon => geotransolver}/test_gale.py    |   2 +-
 .../test_geotransolver.py}                    | 132 ++++++++----------
 11 files changed, 95 insertions(+), 103 deletions(-)
 rename physicsnemo/experimental/models/{typhon => geotransolver}/__init__.py (91%)
 rename physicsnemo/experimental/models/{typhon => geotransolver}/context_projector.py (98%)
 rename physicsnemo/experimental/models/{typhon => geotransolver}/gale.py (97%)
 rename physicsnemo/experimental/models/{typhon/typhon.py => geotransolver/geotransolver.py} (92%)
 rename test/models/{typhon => geotransolver}/__init__.py (100%)
 rename test/models/{data/typhon_basic_output.pth => geotransolver/data/geotransolver_basic_output.pth} (100%)
 rename test/models/{data/typhon_te_output.pth => geotransolver/data/geotransolver_te_output.pth} (100%)
 rename test/models/{data/typhon_tuple_output.pth => geotransolver/data/geotransolver_tuple_output.pth} (100%)
 rename test/models/{typhon => geotransolver}/test_context_projector.py (96%)
 rename test/models/{typhon => geotransolver}/test_gale.py (99%)
 rename test/models/{typhon/test_typhon.py => geotransolver/test_geotransolver.py} (85%)

diff --git a/physicsnemo/experimental/models/typhon/__init__.py b/physicsnemo/experimental/models/geotransolver/__init__.py
similarity index 91%
rename from physicsnemo/experimental/models/typhon/__init__.py
rename to physicsnemo/experimental/models/geotransolver/__init__.py
index 430d0e93d2..febfc7b90f 100644
--- a/physicsnemo/experimental/models/typhon/__init__.py
+++ b/physicsnemo/experimental/models/geotransolver/__init__.py
@@ -14,6 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .typhon import Typhon
+from .geotransolver import GeoTransolver
 
-__all__ = ["Typhon"]
+__all__ = ["GeoTransolver"]
diff --git a/physicsnemo/experimental/models/typhon/context_projector.py b/physicsnemo/experimental/models/geotransolver/context_projector.py
similarity index 98%
rename from physicsnemo/experimental/models/typhon/context_projector.py
rename to physicsnemo/experimental/models/geotransolver/context_projector.py
index 9abdfaf19d..3a1b68da77 100644
--- a/physicsnemo/experimental/models/typhon/context_projector.py
+++ b/physicsnemo/experimental/models/geotransolver/context_projector.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Context Projector for Typhon model.
+"""Context Projector for GeoTransolver model.
 
 This module provides the ContextProjector class, which projects context features
 (geometry or global embeddings) onto learned physical state spaces for use in
@@ -25,11 +25,13 @@
 import torch.nn as nn
 from einops import rearrange
 
-from physicsnemo.utils.version_check import check_min_version
+from physicsnemo.core.version_check import check_version_spec
 from physicsnemo.models.transolver.Physics_Attention import gumbel_softmax
+from physicsnemo.nn.ball_query import BQWarp
+from physicsnemo.nn.mlp_layers import Mlp
 
 # Check optional dependency availability
-TE_AVAILABLE = check_min_version("transformer_engine", "0.1.0", hard_fail=False)
+TE_AVAILABLE = check_version_spec("transformer_engine", "0.1.0", hard_fail=False)
 if TE_AVAILABLE:
     import transformer_engine.pytorch as te
 
@@ -41,7 +43,7 @@ class ContextProjector(nn.Module):
     It projects context values (geometry or global embeddings) onto a learned physical
     state space, but unlike a full attention layer, it never projects back to the
     original space. The projected features are used as context in all GALE blocks
-    of the Typhon model.
+    of the GeoTransolver model.
 
     Parameters
     ----------
@@ -68,7 +70,7 @@ class ContextProjector(nn.Module):
     See Also
     --------
     :class:`GALE` : Full GALE attention layer that uses these projected context features.
-    :class:`Typhon` : Main model that uses ContextProjector for geometry and global embeddings.
+    :class:`GeoTransolver` : Main model that uses ContextProjector for geometry and global embeddings.
     """
 
     def __init__(
@@ -281,7 +283,6 @@ def __init__(
         hidden_dim: int,
     ):
         super().__init__()
-        from physicsnemo.models.layers import BQWarp, Mlp
 
         self.bq_warp = BQWarp(radius=radius, neighbors_in_radius=neighbors_in_radius)
         self.mlp = Mlp(
diff --git a/physicsnemo/experimental/models/typhon/gale.py b/physicsnemo/experimental/models/geotransolver/gale.py
similarity index 97%
rename from physicsnemo/experimental/models/typhon/gale.py
rename to physicsnemo/experimental/models/geotransolver/gale.py
index 2e2f0087c8..82c78c9991 100644
--- a/physicsnemo/experimental/models/typhon/gale.py
+++ b/physicsnemo/experimental/models/geotransolver/gale.py
@@ -23,19 +23,18 @@
 import torch.nn.functional as F
 
 import physicsnemo  # noqa: F401 for docs
-from physicsnemo.utils.version_check import check_min_version
+from physicsnemo.core.version_check import check_version_spec
 from physicsnemo.models.transolver.Physics_Attention import (
     PhysicsAttentionIrregularMesh,
     gumbel_softmax,
 )
 from physicsnemo.models.transolver.transolver import MLP
-from physicsnemo.models.layers import BQWarp, fourier_encode, Mlp
 
-from physicsnemo.models.meta import ModelMetaData
-from physicsnemo.models.module import Module
+from physicsnemo.core.meta import ModelMetaData
+from physicsnemo.core.module import Module
 
 # Check optional dependency availability
-TE_AVAILABLE = check_min_version("transformer_engine", "0.1.0", hard_fail=False)
+TE_AVAILABLE = check_version_spec("transformer_engine", "0.1.0", hard_fail=False)
 if TE_AVAILABLE:
     import transformer_engine.pytorch as te
 
diff --git a/physicsnemo/experimental/models/typhon/typhon.py b/physicsnemo/experimental/models/geotransolver/geotransolver.py
similarity index 92%
rename from physicsnemo/experimental/models/typhon/typhon.py
rename to physicsnemo/experimental/models/geotransolver/geotransolver.py
index 84d61467cd..8330106984 100644
--- a/physicsnemo/experimental/models/typhon/typhon.py
+++ b/physicsnemo/experimental/models/geotransolver/geotransolver.py
@@ -21,29 +21,29 @@
 import torch.nn as nn
 
 import physicsnemo  # noqa: F401 for docs
-from physicsnemo.utils.version_check import check_min_version
+from physicsnemo.core.version_check import check_version_spec
 from physicsnemo.models.transolver.transolver import MLP
 
-from physicsnemo.models.meta import ModelMetaData
-from physicsnemo.models.module import Module
+from physicsnemo.core.meta import ModelMetaData
+from physicsnemo.core.module import Module
 
 from .context_projector import ContextProjector, GlobalContextBuilder
 from .gale import GALE_block
 
 # Check optional dependency availability
-TE_AVAILABLE = check_min_version("transformer_engine", "0.1.0", hard_fail=False)
+TE_AVAILABLE = check_version_spec("transformer_engine", "0.1.0", hard_fail=False)
 if TE_AVAILABLE:
     import transformer_engine.pytorch as te
 
 
 
 @dataclass
-class TyphonMetaData(ModelMetaData):
+class GeoTransolverMetaData(ModelMetaData):
     """
-    Data class for storing essential meta data needed for the Typhon model.
+    Data class for storing essential meta data needed for the GeoTransolver model.
     """
 
-    name: str = "Typhon"
+    name: str = "GeoTransolver"
     # Optimization
     jit: bool = False
     cuda_graphs: bool = False
@@ -76,10 +76,10 @@ def _normalize_tensor(x):
         return x
     raise TypeError(f"Invalid tensor structure")
 
-class Typhon(Module):
-    r"""Typhon: Geometry-Aware Physics Attention Transformer.
+class GeoTransolver(Module):
+    r"""GeoTransolver: Geometry-Aware Physics Attention Transformer.
 
-    Typhon is an adaptation of the Transolver architecture, replacing standard attention
+    GeoTransolver is an adaptation of the Transolver architecture, replacing standard attention
     with GALE (Geometry-Aware Latent Embeddings) attention. GALE combines physics-aware
     self-attention on learned state slices with cross-attention to geometry and global
     context embeddings.
@@ -151,7 +151,7 @@ class Typhon(Module):
 
     Notes
     -----
-    Typhon currently supports unstructured mesh input only. Enhancements for image-based
+    GeoTransolver currently supports unstructured mesh input only. Enhancements for image-based
     and voxel-based inputs may be available in the future.
 
     For more details on Transolver, see:
@@ -160,7 +160,7 @@ class Typhon(Module):
 
     See Also
     --------
-    :class:`GALE` : The attention mechanism used in Typhon.
+    :class:`GALE` : The attention mechanism used in GeoTransolver.
     :class:`GALE_block` : Transformer block using GALE attention.
     :class:`ContextProjector` : Projects context features onto physical states.
 
@@ -170,7 +170,7 @@ class Typhon(Module):
 
     >>> import torch
     >>> import physicsnemo
-    >>> model = physicsnemo.models.Typhon(
+    >>> model = physicsnemo.models.GeoTransolver(
     ...     functional_dim=64,
     ...     out_dim=3,
     ...     n_hidden=256,
@@ -183,7 +183,7 @@ class Typhon(Module):
 
     Usage with geometry and global context:
 
-    >>> model = physicsnemo.models.Typhon(
+    >>> model = physicsnemo.models.GeoTransolver(
     ...     functional_dim=64,
     ...     out_dim=3,
     ...     geometry_dim=3,
@@ -220,8 +220,8 @@ def __init__(
         neighbors_in_radius: list[int] = [8, 32],
         n_hidden_local: int = 32,
     ) -> None:
-        super().__init__(meta=TyphonMetaData())
-        self.__name__ = "Typhon"
+        super().__init__(meta=GeoTransolverMetaData())
+        self.__name__ = "GeoTransolver"
 
         self.include_local_features = include_local_features
 
@@ -229,7 +229,7 @@ def __init__(
         # Check that the hidden dimension and head dimensions are compatible:
         if not n_hidden % n_head == 0:
             raise ValueError(
-                f"Typhon requires n_hidden % n_head == 0, but instead got {n_hidden % n_head}"
+                f"GeoTransolver requires n_hidden % n_head == 0, but instead got {n_hidden % n_head}"
             )
 
         functional_dims = _normalize_dim(functional_dim)
@@ -334,7 +334,7 @@ def forward(
         geometry: torch.Tensor | None = None,
         time: torch.Tensor | None = None,
     ) -> torch.Tensor:
-        r"""Forward pass of the Typhon model.
+        r"""Forward pass of the GeoTransolver model.
 
         The model constructs global context embeddings from geometry and global features by
         projecting them onto physical state spaces. These context embeddings are then used
diff --git a/test/models/typhon/__init__.py b/test/models/geotransolver/__init__.py
similarity index 100%
rename from test/models/typhon/__init__.py
rename to test/models/geotransolver/__init__.py
diff --git a/test/models/data/typhon_basic_output.pth b/test/models/geotransolver/data/geotransolver_basic_output.pth
similarity index 100%
rename from test/models/data/typhon_basic_output.pth
rename to test/models/geotransolver/data/geotransolver_basic_output.pth
diff --git a/test/models/data/typhon_te_output.pth b/test/models/geotransolver/data/geotransolver_te_output.pth
similarity index 100%
rename from test/models/data/typhon_te_output.pth
rename to test/models/geotransolver/data/geotransolver_te_output.pth
diff --git a/test/models/data/typhon_tuple_output.pth b/test/models/geotransolver/data/geotransolver_tuple_output.pth
similarity index 100%
rename from test/models/data/typhon_tuple_output.pth
rename to test/models/geotransolver/data/geotransolver_tuple_output.pth
diff --git a/test/models/typhon/test_context_projector.py b/test/models/geotransolver/test_context_projector.py
similarity index 96%
rename from test/models/typhon/test_context_projector.py
rename to test/models/geotransolver/test_context_projector.py
index 83029ee7bb..1274c10485 100644
--- a/test/models/typhon/test_context_projector.py
+++ b/test/models/geotransolver/test_context_projector.py
@@ -17,7 +17,7 @@
 import pytest
 import torch
 
-from physicsnemo.experimental.models.typhon.typhon import (
+from physicsnemo.experimental.models.geotransolver.context_projector import (
     ContextProjector,
 )
 
diff --git a/test/models/typhon/test_gale.py b/test/models/geotransolver/test_gale.py
similarity index 99%
rename from test/models/typhon/test_gale.py
rename to test/models/geotransolver/test_gale.py
index 1c88dc0ecd..40ac14ffeb 100644
--- a/test/models/typhon/test_gale.py
+++ b/test/models/geotransolver/test_gale.py
@@ -17,7 +17,7 @@
 import pytest
 import torch
 
-from physicsnemo.experimental.models.typhon.gale import (
+from physicsnemo.experimental.models.geotransolver.gale import (
     GALE,
     GALE_block,
 )
diff --git a/test/models/typhon/test_typhon.py b/test/models/geotransolver/test_geotransolver.py
similarity index 85%
rename from test/models/typhon/test_typhon.py
rename to test/models/geotransolver/test_geotransolver.py
index 3a4b09e5b7..c5eab351e8 100644
--- a/test/models/typhon/test_typhon.py
+++ b/test/models/geotransolver/test_geotransolver.py
@@ -14,23 +14,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import random
-import sys
 
 import pytest
 import torch
-from pytest_utils import import_or_fail
 
-from physicsnemo.experimental.models.typhon.typhon import (
-    Typhon,
+from physicsnemo.experimental.models.geotransolver.geotransolver import (
+    GeoTransolver,
 )
-
-# Add parent directory to path for imports
-script_path = os.path.abspath(__file__)
-sys.path.append(os.path.join(os.path.dirname(script_path), ".."))
-
-from common import (  # noqa E402
+from test.common import (  # noqa E402
     validate_amp,
     validate_checkpoint,
     validate_combo_optims,
@@ -38,18 +30,18 @@
     validate_forward_accuracy,
     validate_jit,
 )
-
+from test.conftest import requires_module
 
 # =============================================================================
-# Typhon End-to-End Model Tests
+# GeoTransolver End-to-End Model Tests
 # =============================================================================
 
 
 @pytest.mark.parametrize("device", ["cuda:0", "cpu"])
 @pytest.mark.parametrize("use_geometry", [False, True])
 @pytest.mark.parametrize("use_global", [False, True])
-def test_typhon_forward(device, use_geometry, use_global):
-    """Test Typhon model forward pass with optional geometry and global context."""
+def test_geotransolver_forward(device, use_geometry, use_global):
+    """Test GeoTransolver model forward pass with optional geometry and global context."""
     torch.manual_seed(42)
 
     batch_size = 2
@@ -59,7 +51,7 @@ def test_typhon_forward(device, use_geometry, use_global):
     geometry_dim = 3
     global_dim = 16
 
-    model = Typhon(
+    model = GeoTransolver(
         functional_dim=32,
         out_dim=4,
         geometry_dim=geometry_dim if use_geometry else None,
@@ -97,14 +89,14 @@ def test_typhon_forward(device, use_geometry, use_global):
 
 
 @pytest.mark.parametrize("device", ["cuda:0", "cpu"])
-def test_typhon_forward_tuple_inputs(device):
-    """Test Typhon model forward pass with tuple inputs/outputs (multi-head)."""
+def test_geotransolver_forward_tuple_inputs(device):
+    """Test GeoTransolver model forward pass with tuple inputs/outputs (multi-head)."""
     torch.manual_seed(42)
 
     functional_dims = (32, 48)
     out_dims = (4, 6)
 
-    model = Typhon(
+    model = GeoTransolver(
         functional_dim=functional_dims,
         out_dim=out_dims,
         geometry_dim=3,
@@ -150,13 +142,13 @@ def test_typhon_forward_tuple_inputs(device):
     assert not torch.isnan(outputs[1]).any()
 
 
-@import_or_fail("warp")
+@requires_module("warp")
 @pytest.mark.parametrize("device", ["cuda:0"])
-def test_typhon_forward_with_local_features(device, pytestconfig):
-    """Test Typhon model forward pass with local features (BQ warp)."""
+def test_geotransolver_forward_with_local_features(device, pytestconfig):
+    """Test GeoTransolver model forward pass with local features (BQ warp)."""
     torch.manual_seed(42)
 
-    model = Typhon(
+    model = GeoTransolver(
         functional_dim=32,
         out_dim=4,
         geometry_dim=3,
@@ -206,11 +198,11 @@ def test_typhon_forward_with_local_features(device, pytestconfig):
 
 
 @pytest.mark.parametrize("device", ["cuda:0", "cpu"])
-def test_typhon_forward_accuracy_basic(device):
-    """Test Typhon basic forward pass accuracy."""
+def test_geotransolver_forward_accuracy_basic(device):
+    """Test GeoTransolver basic forward pass accuracy."""
     torch.manual_seed(42)
 
-    model = Typhon(
+    model = GeoTransolver(
         functional_dim=32,
         out_dim=4,
         geometry_dim=3,
@@ -241,20 +233,20 @@ def test_typhon_forward_accuracy_basic(device):
     assert validate_forward_accuracy(
         model,
         (local_emb, local_positions, global_emb, geometry),
-        file_name="typhon_basic_output.pth",
+        file_name="models/geotransolver/data/geotransolver_basic_output.pth",
         atol=1e-3,
     )
 
 
 @pytest.mark.parametrize("device", ["cuda:0", "cpu"])
-def test_typhon_forward_accuracy_tuple(device):
-    """Test Typhon forward pass accuracy with tuple inputs."""
+def test_geotransolver_forward_accuracy_tuple(device):
+    """Test GeoTransolver forward pass accuracy with tuple inputs."""
     torch.manual_seed(42)
 
     functional_dims = (32, 48)
     out_dims = (4, 6)
 
-    model = Typhon(
+    model = GeoTransolver(
         functional_dim=functional_dims,
         out_dim=out_dims,
         geometry_dim=3,
@@ -294,7 +286,7 @@ def test_typhon_forward_accuracy_tuple(device):
             global_emb,
             geometry,
         ),
-        file_name="typhon_tuple_output.pth",
+        file_name="models/geotransolver/data/geotransolver_tuple_output.pth",
         atol=1e-3,
     )
 
@@ -305,12 +297,12 @@ def test_typhon_forward_accuracy_tuple(device):
 
 
 @pytest.mark.parametrize("device", ["cuda:0"])
-def test_typhon_optimizations(device):
-    """Test Typhon optimizations (CUDA graphs, JIT, AMP, combo)."""
+def test_geotransolver_optimizations(device):
+    """Test GeoTransolver optimizations (CUDA graphs, JIT, AMP, combo)."""
 
     def setup_model():
-        """Setup fresh Typhon model and inputs for each optimization test."""
-        model = Typhon(
+        """Setup fresh GeoTransolver model and inputs for each optimization test."""
+        model = GeoTransolver(
             functional_dim=32,
             out_dim=4,
             geometry_dim=3,
@@ -373,13 +365,13 @@ def setup_model():
 # =============================================================================
 
 
-@import_or_fail("transformer_engine")
+@requires_module("transformer_engine")
 @pytest.mark.parametrize("device", ["cuda:0"])
-def test_typhon_te_basic(device, pytestconfig):
-    """Test Typhon with Transformer Engine backend."""
+def test_geotransolver_te_basic(device, pytestconfig):
+    """Test GeoTransolver with Transformer Engine backend."""
     torch.manual_seed(42)
 
-    model = Typhon(
+    model = GeoTransolver(
         functional_dim=32,
         out_dim=4,
         geometry_dim=3,
@@ -425,11 +417,11 @@ def test_typhon_te_basic(device, pytestconfig):
 
 
 @pytest.mark.parametrize("device", ["cuda:0", "cpu"])
-def test_typhon_checkpoint(device):
-    """Test Typhon checkpoint save/load."""
+def test_geotransolver_checkpoint(device):
+    """Test GeoTransolver checkpoint save/load."""
     torch.manual_seed(42)
 
-    model_1 = Typhon(
+    model_1 = GeoTransolver(
         functional_dim=32,
         out_dim=4,
         geometry_dim=3,
@@ -447,7 +439,7 @@ def test_typhon_checkpoint(device):
         include_local_features=False,
     ).to(device)
 
-    model_2 = Typhon(
+    model_2 = GeoTransolver(
         functional_dim=32,
         out_dim=4,
         geometry_dim=3,
@@ -481,14 +473,14 @@ def test_typhon_checkpoint(device):
 
 
 @pytest.mark.parametrize("device", ["cuda:0", "cpu"])
-def test_typhon_checkpoint_tuple(device):
-    """Test Typhon checkpoint save/load with tuple inputs."""
+def test_geotransolver_checkpoint_tuple(device):
+    """Test GeoTransolver checkpoint save/load with tuple inputs."""
     torch.manual_seed(42)
 
     functional_dims = (32, 48)
     out_dims = (4, 6)
 
-    model_1 = Typhon(
+    model_1 = GeoTransolver(
         functional_dim=functional_dims,
         out_dim=out_dims,
         geometry_dim=3,
@@ -506,7 +498,7 @@ def test_typhon_checkpoint_tuple(device):
         include_local_features=False,
     ).to(device)
 
-    model_2 = Typhon(
+    model_2 = GeoTransolver(
         functional_dim=functional_dims,
         out_dim=out_dims,
         geometry_dim=3,
@@ -546,10 +538,10 @@ def test_typhon_checkpoint_tuple(device):
 # =============================================================================
 
 
-def test_typhon_invalid_hidden_head_dims():
-    """Test that Typhon raises error for incompatible hidden/head dimensions."""
+def test_geotransolver_invalid_hidden_head_dims():
+    """Test that GeoTransolver raises error for incompatible hidden/head dimensions."""
     with pytest.raises(ValueError, match="n_hidden % n_head == 0"):
-        Typhon(
+        GeoTransolver(
             functional_dim=32,
             out_dim=4,
             n_hidden=65,  # Not divisible by n_head=4
@@ -558,12 +550,12 @@ def test_typhon_invalid_hidden_head_dims():
         )
 
 
-def test_typhon_mismatched_functional_out_dims():
-    """Test that Typhon raises error for mismatched functional/out dim lengths."""
+def test_geotransolver_mismatched_functional_out_dims():
+    """Test that GeoTransolver raises error for mismatched functional/out dim lengths."""
     with pytest.raises(
         ValueError, match="functional_dim and out_dim must be the same length"
     ):
-        Typhon(
+        GeoTransolver(
             functional_dim=(32, 48),
             out_dim=(4,),  # Length mismatch
             use_te=False,
@@ -577,11 +569,11 @@ def test_typhon_mismatched_functional_out_dims():
 
 @pytest.mark.parametrize("device", ["cuda:0"])
 @pytest.mark.parametrize("activation", ["gelu", "relu", "tanh", "silu"])
-def test_typhon_activations(device, activation):
-    """Test Typhon with different activation functions."""
+def test_geotransolver_activations(device, activation):
+    """Test GeoTransolver with different activation functions."""
     torch.manual_seed(42)
 
-    model = Typhon(
+    model = GeoTransolver(
         functional_dim=32,
         out_dim=4,
         geometry_dim=3,
@@ -624,11 +616,11 @@ def test_typhon_activations(device, activation):
 
 @pytest.mark.parametrize("device", ["cuda:0"])
 @pytest.mark.parametrize("n_layers", [1, 2, 4])
-def test_typhon_different_depths(device, n_layers):
-    """Test Typhon with different numbers of layers."""
+def test_geotransolver_different_depths(device, n_layers):
+    """Test GeoTransolver with different numbers of layers."""
     torch.manual_seed(42)
 
-    model = Typhon(
+    model = GeoTransolver(
         functional_dim=32,
         out_dim=4,
         geometry_dim=3,
@@ -666,11 +658,11 @@ def test_typhon_different_depths(device, n_layers):
 
 @pytest.mark.parametrize("device", ["cuda:0"])
 @pytest.mark.parametrize("slice_num", [4, 16, 32])
-def test_typhon_different_slice_nums(device, slice_num):
-    """Test Typhon with different numbers of physical state slices."""
+def test_geotransolver_different_slice_nums(device, slice_num):
+    """Test GeoTransolver with different numbers of physical state slices."""
     torch.manual_seed(42)
 
-    model = Typhon(
+    model = GeoTransolver(
         functional_dim=32,
         out_dim=4,
         geometry_dim=3,
@@ -708,11 +700,11 @@ def test_typhon_different_slice_nums(device, slice_num):
 
 @pytest.mark.parametrize("device", ["cuda:0"])
 @pytest.mark.parametrize("n_hidden,n_head", [(64, 4), (128, 8), (256, 8)])
-def test_typhon_different_hidden_sizes(device, n_hidden, n_head):
-    """Test Typhon with different hidden dimensions and head counts."""
+def test_geotransolver_different_hidden_sizes(device, n_hidden, n_head):
+    """Test GeoTransolver with different hidden dimensions and head counts."""
     torch.manual_seed(42)
 
-    model = Typhon(
+    model = GeoTransolver(
         functional_dim=32,
         out_dim=4,
         geometry_dim=3,
@@ -753,14 +745,14 @@ def test_typhon_different_hidden_sizes(device, n_hidden, n_head):
 # =============================================================================
 
 
-def test_typhon_metadata():
-    """Test Typhon model metadata."""
-    model = Typhon(
+def test_geotransolver_metadata():
+    """Test GeoTransolver model metadata."""
+    model = GeoTransolver(
         functional_dim=32,
         out_dim=4,
         use_te=False,
     )
 
-    assert model.meta.name == "Typhon"
+    assert model.meta.name == "GeoTransolver"
     assert model.meta.amp is True
-    assert model.__name__ == "Typhon"
+    assert model.__name__ == "GeoTransolver"

From 5bc5375620d436d56ca72a113fdd722f7b83aa5c Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 19 Dec 2025 16:28:36 +0000
Subject: [PATCH 26/32] Rename and add inference script

---
 .../src/conf/data/surface.yaml                |   4 +-
 ...urface.yaml => geotransolver_surface.yaml} |   6 +-
 ..._volume.yaml => geotransolver_volume.yaml} |   4 +-
 .../model/{typhon.yaml => geotransolver.yaml} |   2 +-
 .../src/inference_on_vtk.py                   | 729 ++++++++++++++++++
 .../transformer_models/src/metrics.py         |  14 +-
 .../transformer_models/src/train.py           |  54 +-
 7 files changed, 750 insertions(+), 63 deletions(-)
 rename examples/cfd/external_aerodynamics/transformer_models/src/conf/{typhon_surface.yaml => geotransolver_surface.yaml} (94%)
 rename examples/cfd/external_aerodynamics/transformer_models/src/conf/{typhon_volume.yaml => geotransolver_volume.yaml} (96%)
 rename examples/cfd/external_aerodynamics/transformer_models/src/conf/model/{typhon.yaml => geotransolver.yaml} (94%)
 create mode 100644 examples/cfd/external_aerodynamics/transformer_models/src/inference_on_vtk.py

diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/data/surface.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/data/surface.yaml
index 0f39ba5b10..fed0255591 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/data/surface.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/data/surface.yaml
@@ -28,4 +28,6 @@ data_keys:
   - "surface_areas"
   - "stl_faces"
   - "stl_centers"
-  - "stl_coordinates"
\ No newline at end of file
+  - "stl_coordinates"
+  - "air_density"
+  - "stream_velocity"
\ No newline at end of file
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/geotransolver_surface.yaml
similarity index 94%
rename from examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/src/conf/geotransolver_surface.yaml
index 5d5955c828..173c5cfc02 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_surface.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/geotransolver_surface.yaml
@@ -16,12 +16,12 @@
 
 defaults:
   - training: base
-  - model: typhon
+  - model: geotransolver
   - data: surface
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
-run_id: "typhon/surface/bq"
+run_id: "geotransolver/surface/bq"
 
 # Performance considerations:
 precision: float32 # float32, float16, bfloat16, or float8
@@ -39,7 +39,7 @@ data:
   include_sdf: false
   include_geometry: true
   geometry_sampling: 300_000
-  broadcast_global_features: false
+  broadcast_global_features: true
 
 
 # Logging configuration
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/geotransolver_volume.yaml
similarity index 96%
rename from examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/src/conf/geotransolver_volume.yaml
index d2e0189bf8..cd91b485cc 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/typhon_volume.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/geotransolver_volume.yaml
@@ -16,12 +16,12 @@
 
 defaults:
   - training: base
-  - model: typhon
+  - model: geotransolver
   - data: volume
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
-run_id: "typhon/volume/bq"
+run_id: "geotransolver/volume/bq"
 
 # Performance considerations:
 precision: float32 # float32, float16, bfloat16, or float8
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/model/typhon.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/model/geotransolver.yaml
similarity index 94%
rename from examples/cfd/external_aerodynamics/transformer_models/src/conf/model/typhon.yaml
rename to examples/cfd/external_aerodynamics/transformer_models/src/conf/model/geotransolver.yaml
index 6b1e28092c..456c1eea8f 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/model/typhon.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/model/geotransolver.yaml
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-_target_: physicsnemo.experimental.models.typhon.Typhon
+_target_: physicsnemo.experimental.models.geotransolver.GeoTransolver
 functional_dim: 6
 global_dim: 2
 geometry_dim: 3
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/inference_on_vtk.py b/examples/cfd/external_aerodynamics/transformer_models/src/inference_on_vtk.py
new file mode 100644
index 0000000000..87cbf3a9a9
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/inference_on_vtk.py
@@ -0,0 +1,729 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Inference script for running trained Transolver/GeoTransolver models on raw VTK files.
+
+This script reads VTP (surface) and VTU (volume) files directly, processes them through
+the TransolverDataPipe, runs batched inference, and saves predictions back to VTK files.
+
+Usage (surface inference with GeoTransolver):
+    python inference_on_vtk.py --config-name=geotransolver_surface \
+        +vtk_inference.input_dir=/path/to/runs \
+        +vtk_inference.output_dir=/path/to/output \
+        +vtk_inference.air_density=1.2050 \
+        +vtk_inference.stream_velocity=30.0
+
+Usage (volume inference with GeoTransolver):
+    python inference_on_vtk.py --config-name=geotransolver_volume \
+        +vtk_inference.input_dir=/path/to/runs \
+        +vtk_inference.output_dir=/path/to/output
+
+Usage (surface inference with Transolver):
+    python inference_on_vtk.py --config-name=transolver_surface \
+        +vtk_inference.input_dir=/path/to/runs \
+        +vtk_inference.output_dir=/path/to/output
+
+Note: The '+' prefix adds new config keys that don't exist in the base config.
+
+Expected input directory structure:
+    input_dir/
+    ├── run_1/
+    │   ├── boundary_1.vtp              # Surface mesh
+    │   ├── volume_1.vtu                # Volume mesh
+    │   └── drivaer_1_single_solid.stl  # STL geometry
+    ├── run_2/
+    │   └── ...
+    └── ...
+"""
+
+from pathlib import Path
+from typing import Literal
+import time
+
+import numpy as np
+import torch
+import torchinfo
+import pyvista as pv
+
+import hydra
+import omegaconf
+from omegaconf import DictConfig
+
+from physicsnemo.distributed import DistributedManager
+from physicsnemo.utils import load_checkpoint
+from physicsnemo.utils.logging import PythonLogger, RankZeroLoggingWrapper
+
+from physicsnemo.datapipes.cae.transolver_datapipe import TransolverDataPipe
+
+from train import update_model_params_for_fp8
+
+from inference_on_zarr import batched_inference_loop
+
+
+# =============================================================================
+# VTK File Reading Functions
+# =============================================================================
+
+
+def read_stl_geometry(stl_path: str, device: torch.device) -> dict[str, torch.Tensor]:
+    """
+    Read STL file and extract geometry data for SDF calculation.
+
+    Parameters
+    ----------
+    stl_path : str
+        Path to the STL file (e.g., drivaer_N_single_solid.stl).
+    device : torch.device
+        Device to place tensors on.
+
+    Returns
+    -------
+    dict[str, torch.Tensor]
+        Dictionary containing:
+        - stl_coordinates: Vertex coordinates, shape (num_vertices, 3)
+        - stl_faces: Face indices (flattened), shape (num_faces * 3,)
+        - stl_centers: Cell centers, shape (num_cells, 3)
+    """
+    mesh = pv.read(stl_path)
+
+    # Get vertex coordinates
+    stl_coordinates = torch.from_numpy(np.asarray(mesh.points)).to(
+        device=device, dtype=torch.float32
+    )
+
+    # Get face indices - pyvista stores as [n_verts, v0, v1, v2, n_verts, v0, v1, v2, ...]
+    # We reshape to extract just the vertex indices for triangles
+    faces = mesh.faces.reshape(-1, 4)[:, 1:]  # Remove the count column
+    stl_faces = torch.from_numpy(faces.flatten()).to(device=device, dtype=torch.int32)
+
+    # Get cell centers
+    stl_centers = torch.from_numpy(np.asarray(mesh.cell_centers().points)).to(
+        device=device, dtype=torch.float32
+    )
+
+    return {
+        "stl_coordinates": stl_coordinates,
+        "stl_faces": stl_faces,
+        "stl_centers": stl_centers,
+    }
+
+
+def read_surface_from_vtp(
+    vtp_path: str, device: torch.device, n_output_fields: int = 4
+) -> dict[str, torch.Tensor]:
+    """
+    Read VTP (PolyData) file and extract surface mesh data.
+
+    Parameters
+    ----------
+    vtp_path : str
+        Path to the VTP file (e.g., boundary_N.vtp).
+    device : torch.device
+        Device to place tensors on.
+    n_output_fields : int
+        Number of output fields (default 4: pressure + 3 wall shear stress components).
+
+    Returns
+    -------
+    dict[str, torch.Tensor]
+        Dictionary containing:
+        - surface_mesh_centers: Cell center coordinates, shape (num_cells, 3)
+        - surface_normals: Cell normals, shape (num_cells, 3)
+        - surface_areas: Cell areas, shape (num_cells,)
+        - surface_fields: Dummy zeros for inference, shape (num_cells, n_output_fields)
+    """
+    mesh = pv.read(vtp_path)
+
+    # Get cell centers
+    surface_mesh_centers = torch.from_numpy(np.asarray(mesh.cell_centers().points)).to(
+        device=device, dtype=torch.float32
+    )
+
+    # Get cell normals (normalized)
+    normals = np.asarray(mesh.cell_normals)
+    normals = normals / (np.linalg.norm(normals, axis=1, keepdims=True) + 1e-8)
+    surface_normals = torch.from_numpy(normals).to(device=device, dtype=torch.float32)
+
+    # Compute cell areas
+    cell_sizes = mesh.compute_cell_sizes(length=False, area=True, volume=False)
+    surface_areas = torch.from_numpy(np.asarray(cell_sizes.cell_data["Area"])).to(
+        device=device, dtype=torch.float32
+    )
+
+    # Create dummy fields for inference (zeros)
+    num_cells = surface_mesh_centers.shape[0]
+    surface_fields = torch.zeros(
+        (num_cells, n_output_fields), device=device, dtype=torch.float32
+    )
+
+    return {
+        "surface_mesh_centers": surface_mesh_centers,
+        "surface_normals": surface_normals,
+        "surface_areas": surface_areas,
+        "surface_fields": surface_fields,
+    }
+
+
+def read_volume_from_vtu(
+    vtu_path: str, device: torch.device, n_output_fields: int = 5
+) -> dict[str, torch.Tensor]:
+    """
+    Read VTU (UnstructuredGrid) file and extract volume mesh data.
+
+    Parameters
+    ----------
+    vtu_path : str
+        Path to the VTU file (e.g., volume_N.vtu).
+    device : torch.device
+        Device to place tensors on.
+    n_output_fields : int
+        Number of output fields (default 5: 3 velocity + pressure + turbulent viscosity).
+
+    Returns
+    -------
+    dict[str, torch.Tensor]
+        Dictionary containing:
+        - volume_mesh_centers: Cell center coordinates, shape (num_cells, 3)
+        - volume_fields: Dummy zeros for inference, shape (num_cells, n_output_fields)
+    """
+    mesh = pv.read(vtu_path)
+
+    # Get cell centers
+    volume_mesh_centers = torch.from_numpy(np.asarray(mesh.cell_centers().points)).to(
+        device=device, dtype=torch.float32
+    )
+
+    # Create dummy fields for inference (zeros)
+    num_cells = volume_mesh_centers.shape[0]
+    volume_fields = torch.zeros(
+        (num_cells, n_output_fields), device=device, dtype=torch.float32
+    )
+
+    return {
+        "volume_mesh_centers": volume_mesh_centers,
+        "volume_fields": volume_fields,
+    }
+
+
+# =============================================================================
+# Data Dict Builder
+# =============================================================================
+
+
+def build_data_dict(
+    run_dir: Path,
+    data_mode: Literal["surface", "volume", "combined"],
+    device: torch.device,
+    air_density: float,
+    stream_velocity: float,
+    run_idx: int,
+) -> dict[str, torch.Tensor]:
+    """
+    Build a complete data dictionary from VTK files for a single run.
+
+    This function reads VTP, VTU, and STL files from a run directory and
+    combines them into a dictionary compatible with TransolverDataPipe.process_data().
+
+    Parameters
+    ----------
+    run_dir : Path
+        Path to the run directory containing VTK files.
+    data_mode : Literal["surface", "volume", "combined"]
+        Which data to load - surface, volume, or both.
+    device : torch.device
+        Device to place tensors on.
+    air_density : float
+        Air density value for the simulation.
+    stream_velocity : float
+        Stream velocity value for the simulation.
+    run_idx : int
+        The run index (used for file naming conventions).
+
+    Returns
+    -------
+    dict[str, torch.Tensor]
+        Complete data dictionary for the datapipe.
+    """
+    data_dict = {}
+
+    # Always read STL geometry (needed for SDF in volume mode, center of mass calculation)
+    stl_path = run_dir / f"drivaer_{run_idx}_single_solid.stl"
+    if stl_path.exists():
+        stl_data = read_stl_geometry(str(stl_path), device)
+        data_dict.update(stl_data)
+    else:
+        # Try alternative naming
+        stl_files = list(run_dir.glob("*_single_solid.stl"))
+        if stl_files:
+            stl_data = read_stl_geometry(str(stl_files[0]), device)
+            data_dict.update(stl_data)
+        else:
+            raise FileNotFoundError(f"No STL file found in {run_dir}")
+
+    # Read surface data if needed
+    if data_mode in ["surface", "combined"]:
+        vtp_path = run_dir / f"boundary_{run_idx}.vtp"
+        if not vtp_path.exists():
+            # Try alternative naming
+            vtp_files = list(run_dir.glob("boundary_*.vtp"))
+            if vtp_files:
+                vtp_path = vtp_files[0]
+            else:
+                raise FileNotFoundError(f"No VTP file found in {run_dir}")
+
+        surface_data = read_surface_from_vtp(str(vtp_path), device)
+        data_dict.update(surface_data)
+
+    # Read volume data if needed
+    if data_mode in ["volume", "combined"]:
+        vtu_path = run_dir / f"volume_{run_idx}.vtu"
+        if not vtu_path.exists():
+            # Try alternative naming
+            vtu_files = list(run_dir.glob("volume_*.vtu"))
+            if vtu_files:
+                vtu_path = vtu_files[0]
+            else:
+                raise FileNotFoundError(f"No VTU file found in {run_dir}")
+
+        volume_data = read_volume_from_vtu(str(vtu_path), device)
+        data_dict.update(volume_data)
+
+    # Add flow parameters
+    data_dict["air_density"] = torch.tensor(
+        [air_density], device=device, dtype=torch.float32
+    )
+    data_dict["stream_velocity"] = torch.tensor(
+        [stream_velocity], device=device, dtype=torch.float32
+    )
+
+    return data_dict
+
+
+# =============================================================================
+# Prediction Writer
+# =============================================================================
+
+
+def write_surface_predictions_to_vtk(
+    vtp_path: str,
+    output_path: str,
+    predictions: torch.Tensor,
+    air_density: float,
+    stream_velocity: float,
+) -> None:
+    """
+    Write surface predictions to a VTP file.
+
+    Parameters
+    ----------
+    vtp_path : str
+        Path to the original VTP file (to copy mesh structure).
+    output_path : str
+        Path to write the output VTP file.
+    predictions : torch.Tensor
+        Model predictions, shape (num_cells, 4) - [pressure, wss_x, wss_y, wss_z].
+    air_density : float
+        Air density for dimensional scaling.
+    stream_velocity : float
+        Stream velocity for dimensional scaling.
+    """
+    mesh = pv.read(vtp_path)
+    output_mesh = mesh.copy()
+
+    # Convert to numpy
+    pred_np = predictions.cpu().numpy()
+
+    # Split into pressure and wall shear stress
+    pred_pressure = pred_np[:, 0]  # Shape: (num_cells,)
+    pred_wss = pred_np[:, 1:4]  # Shape: (num_cells, 3)
+
+    # Scale to physical units
+    dynamic_pressure = air_density * stream_velocity**2
+    pred_pressure = pred_pressure * dynamic_pressure
+    pred_wss = pred_wss * dynamic_pressure
+
+    # Add to mesh
+    output_mesh.cell_data["PredictedPressure"] = pred_pressure
+    output_mesh.cell_data["PredictedWallShearStress"] = pred_wss
+
+    # Save
+    output_mesh.save(output_path)
+
+
+def write_volume_predictions_to_vtk(
+    vtu_path: str,
+    output_path: str,
+    predictions: torch.Tensor,
+    air_density: float,
+    stream_velocity: float,
+) -> None:
+    """
+    Write volume predictions to a VTU file.
+
+    Parameters
+    ----------
+    vtu_path : str
+        Path to the original VTU file (to copy mesh structure).
+    output_path : str
+        Path to write the output VTU file.
+    predictions : torch.Tensor
+        Model predictions, shape (num_cells, 5) - [vel_x, vel_y, vel_z, pressure, nut].
+    air_density : float
+        Air density for dimensional scaling.
+    stream_velocity : float
+        Stream velocity for dimensional scaling.
+    """
+    mesh = pv.read(vtu_path)
+    output_mesh = mesh.copy()
+
+    # Convert to numpy
+    pred_np = predictions.cpu().numpy()
+
+    # Split into velocity, pressure, and turbulent viscosity
+    pred_velocity = pred_np[:, 0:3]  # Shape: (num_cells, 3)
+    pred_pressure = pred_np[:, 3]  # Shape: (num_cells,)
+    pred_nut = pred_np[:, 4]  # Shape: (num_cells,)
+
+    # Scale to physical units
+    dynamic_pressure = air_density * stream_velocity**2
+    pred_velocity = pred_velocity * stream_velocity
+    pred_pressure = pred_pressure * dynamic_pressure
+    pred_nut = pred_nut * dynamic_pressure
+
+    # Add to mesh
+    output_mesh.cell_data["PredictedVelocity"] = pred_velocity
+    output_mesh.cell_data["PredictedPressure"] = pred_pressure
+    output_mesh.cell_data["PredictedNut"] = pred_nut
+
+    # Save
+    output_mesh.save(output_path)
+
+
+# =============================================================================
+# Main Inference Function
+# =============================================================================
+
+
+def create_datapipe(
+    cfg: DictConfig,
+    data_mode: Literal["surface", "volume", "combined"],
+    device: torch.device,
+    surface_factors: dict | None,
+    volume_factors: dict | None,
+) -> TransolverDataPipe:
+    """
+    Create a TransolverDataPipe configured for inference.
+
+    Parameters
+    ----------
+    cfg : DictConfig
+        Hydra configuration.
+    data_mode : Literal["surface", "volume", "combined"]
+        Data mode for the datapipe.
+    device : torch.device
+        Device for tensors.
+    surface_factors : dict | None
+        Normalization factors for surface fields.
+    volume_factors : dict | None
+        Normalization factors for volume fields.
+
+    Returns
+    -------
+    TransolverDataPipe
+        Configured datapipe for inference.
+    """
+    # Build overrides from config
+    overrides = {}
+
+    optional_keys = [
+        "include_normals",
+        "include_sdf",
+        "broadcast_global_features",
+        "include_geometry",
+        "geometry_sampling",
+        "translational_invariance",
+        "reference_origin",
+        "scale_invariance",
+        "reference_scale",
+    ]
+
+    for key in optional_keys:
+        if cfg.data.get(key, None) is not None:
+            overrides[key] = cfg.data[key]
+
+    # Create the datapipe with no resolution limit (we handle batching ourselves)
+    datapipe = TransolverDataPipe(
+        input_path=None,  # We're not using the dataset iterator
+        model_type=data_mode,
+        resolution=None,  # No downsampling - we batch manually
+        surface_factors=surface_factors,
+        volume_factors=volume_factors,
+        scaling_type="mean_std_scaling",
+        return_mesh_features=True,  # For surface areas/normals if needed
+        **overrides,
+    )
+
+    # Move reference scale to device if needed
+    if datapipe.config.scale_invariance and datapipe.config.reference_scale is not None:
+        datapipe.config.reference_scale = datapipe.config.reference_scale.to(device)
+
+    return datapipe
+
+
+def inference_on_vtk(cfg: DictConfig) -> None:
+    """
+    Main inference function for VTK files.
+
+    Parameters
+    ----------
+    cfg : DictConfig
+        Hydra configuration object.
+    """
+    # Initialize distributed
+    DistributedManager.initialize()
+    dist_manager = DistributedManager()
+
+    logger = RankZeroLoggingWrapper(PythonLogger(name="vtk_inference"), dist_manager)
+
+    # Update config for FP8 if needed
+    cfg, output_pad_size = update_model_params_for_fp8(cfg, logger)
+
+    logger.info(f"Config:\n{omegaconf.OmegaConf.to_yaml(cfg, resolve=True)}")
+
+    # Get VTK inference config - these are added via command line with '+' prefix
+    if not cfg.get("vtk_inference", None):
+        raise ValueError(
+            "vtk_inference config section is required. "
+            "Add it via command line with '+vtk_inference.input_dir=...' etc."
+        )
+
+    vtk_cfg = cfg.vtk_inference
+
+    # Required parameters
+    if not vtk_cfg.get("input_dir", None):
+        raise ValueError("vtk_inference.input_dir is required")
+    if not vtk_cfg.get("output_dir", None):
+        raise ValueError("vtk_inference.output_dir is required")
+
+    input_dir = Path(vtk_cfg.input_dir)
+    output_dir = Path(vtk_cfg.output_dir)
+
+    # Optional parameters with defaults
+    air_density = vtk_cfg.get("air_density", 1.2050)
+    stream_velocity = vtk_cfg.get("stream_velocity", 30.0)
+    run_indices = vtk_cfg.get("run_indices", None)
+
+    logger.info(f"VTK Inference Settings:")
+    logger.info(f"  input_dir: {input_dir}")
+    logger.info(f"  output_dir: {output_dir}")
+    logger.info(f"  air_density: {air_density}")
+    logger.info(f"  stream_velocity: {stream_velocity}")
+    logger.info(f"  run_indices: {run_indices}")
+
+    # Create output directory
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Determine data mode
+    data_mode = cfg.data.mode
+
+    # Set up model
+    model = hydra.utils.instantiate(cfg.model)
+    logger.info(f"\n{torchinfo.summary(model, verbose=0)}")
+
+    # Load checkpoint
+    if cfg.checkpoint_dir is not None:
+        checkpoint_dir = cfg.checkpoint_dir
+    else:
+        checkpoint_dir = f"{cfg.output_dir}/{cfg.run_id}/checkpoints"
+
+    ckpt_args = {
+        "path": checkpoint_dir,
+        "models": model,
+    }
+
+    loaded_epoch = load_checkpoint(device=dist_manager.device, **ckpt_args)
+    logger.info(f"Loaded checkpoint from epoch: {loaded_epoch}")
+
+    model.to(dist_manager.device)
+    model.eval()
+
+    if cfg.compile:
+        model = torch.compile(model, dynamic=True)
+
+    num_params = sum(p.numel() for p in model.parameters())
+    logger.info(f"Number of model parameters: {num_params}")
+
+    # Load normalization factors
+    norm_dir = getattr(cfg.data, "normalization_dir", ".")
+
+    surface_factors = None
+    volume_factors = None
+
+    if data_mode in ["surface", "combined"]:
+        norm_file = str(Path(norm_dir) / "surface_fields_normalization.npz")
+        if Path(norm_file).exists():
+            norm_data = np.load(norm_file)
+            surface_factors = {
+                "mean": torch.from_numpy(norm_data["mean"]).to(dist_manager.device),
+                "std": torch.from_numpy(norm_data["std"]).to(dist_manager.device),
+            }
+            logger.info(f"Loaded surface normalization from {norm_file}")
+
+    if data_mode in ["volume", "combined"]:
+        norm_file = str(Path(norm_dir) / "volume_fields_normalization.npz")
+        if Path(norm_file).exists():
+            norm_data = np.load(norm_file)
+            volume_factors = {
+                "mean": torch.from_numpy(norm_data["mean"]).to(dist_manager.device),
+                "std": torch.from_numpy(norm_data["std"]).to(dist_manager.device),
+            }
+            logger.info(f"Loaded volume normalization from {norm_file}")
+
+    # Create datapipe
+    datapipe = create_datapipe(
+        cfg, data_mode, dist_manager.device, surface_factors, volume_factors
+    )
+
+    # Get batch resolution from config
+    batch_resolution = cfg.data.resolution
+
+    # Find all run directories
+    if run_indices is not None:
+        run_dirs = [input_dir / f"run_{idx}" for idx in run_indices]
+    else:
+        run_dirs = sorted(
+            [d for d in input_dir.iterdir() if d.is_dir() and d.name.startswith("run_")]
+        )
+
+    logger.info(f"Found {len(run_dirs)} run directories to process")
+
+    # Distribute runs across ranks
+    this_device_runs = run_dirs[dist_manager.rank :: dist_manager.world_size]
+    logger.info(f"Rank {dist_manager.rank} processing {len(this_device_runs)} runs")
+
+    # Process each run
+    for run_dir in this_device_runs:
+        run_idx = int(run_dir.name.split("_")[1])
+        logger.info(f"Processing run {run_idx}: {run_dir}")
+
+        start_time = time.time()
+
+        try:
+            # Build data dictionary from VTK files
+            data_dict = build_data_dict(
+                run_dir=run_dir,
+                data_mode=data_mode,
+                device=dist_manager.device,
+                air_density=air_density,
+                stream_velocity=stream_velocity,
+                run_idx=run_idx,
+            )
+
+            # Process through datapipe (adds batch dimension)
+            batch = datapipe(data_dict)
+
+            # Run batched inference using imported function from inference_on_zarr
+            with torch.no_grad():
+                _, _, (predictions, _) = batched_inference_loop(
+                    batch=batch,
+                    model=model,
+                    precision=cfg.precision,
+                    data_mode=data_mode,
+                    batch_resolution=batch_resolution,
+                    output_pad_size=output_pad_size,
+                    dist_manager=dist_manager,
+                    datapipe=datapipe,
+                )
+
+            # Remove batch dimension and get predictions
+            predictions = predictions.squeeze(0)
+
+            # Write predictions to output files
+            run_output_dir = output_dir / run_dir.name
+            run_output_dir.mkdir(parents=True, exist_ok=True)
+
+            if data_mode in ["surface", "combined"]:
+                vtp_path = run_dir / f"boundary_{run_idx}.vtp"
+                if not vtp_path.exists():
+                    vtp_path = list(run_dir.glob("boundary_*.vtp"))[0]
+
+                output_vtp = run_output_dir / f"pred_boundary_{run_idx}.vtp"
+                write_surface_predictions_to_vtk(
+                    str(vtp_path),
+                    str(output_vtp),
+                    predictions,
+                    air_density,
+                    stream_velocity,
+                )
+                logger.info(f"Saved surface predictions to {output_vtp}")
+
+            if data_mode in ["volume", "combined"]:
+                vtu_path = run_dir / f"volume_{run_idx}.vtu"
+                if not vtu_path.exists():
+                    vtu_path = list(run_dir.glob("volume_*.vtu"))[0]
+
+                output_vtu = run_output_dir / f"pred_volume_{run_idx}.vtu"
+                write_volume_predictions_to_vtk(
+                    str(vtu_path),
+                    str(output_vtu),
+                    predictions,
+                    air_density,
+                    stream_velocity,
+                )
+                logger.info(f"Saved volume predictions to {output_vtu}")
+
+            elapsed = time.time() - start_time
+            logger.info(f"Completed run {run_idx} in {elapsed:.2f} seconds")
+
+        except Exception as e:
+            logger.error(f"Error processing run {run_idx}: {e}")
+            import traceback
+
+            traceback.print_exc()
+            continue
+
+    logger.info("Inference complete!")
+
+
+# =============================================================================
+# Entry Point
+# =============================================================================
+
+
+@hydra.main(version_base=None, config_path="conf", config_name="geotransolver_surface")
+def launch(cfg: DictConfig) -> None:
+    """
+    Launch VTK inference with Hydra configuration.
+
+    Uses existing geotransolver/transolver configs. VTK-specific parameters
+    must be added via command line with '+' prefix:
+        +vtk_inference.input_dir=/path/to/runs
+        +vtk_inference.output_dir=/path/to/output
+        +vtk_inference.air_density=1.2050  (optional, default: 1.2050)
+        +vtk_inference.stream_velocity=30.0  (optional, default: 30.0)
+        +vtk_inference.run_indices=[1,2,3]  (optional, default: all runs)
+
+    Parameters
+    ----------
+    cfg : DictConfig
+        Hydra configuration object.
+    """
+    inference_on_vtk(cfg)
+
+
+if __name__ == "__main__":
+    launch()
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/metrics.py b/examples/cfd/external_aerodynamics/transformer_models/src/metrics.py
index 080526f23a..be86d0cdc9 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/metrics.py
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/metrics.py
@@ -16,7 +16,7 @@
 
 import torch
 import torch.distributed as dist
-from physicsnemo.distributed import ShardTensor
+from physicsnemo.domain_parallel import ShardTensor
 from physicsnemo.distributed import DistributedManager
 
 from utils import tensorwise
@@ -102,12 +102,12 @@ def metrics_fn_volume(
         NotImplementedError: Always, as this function is not yet implemented.
     """
 
-    # 
+    #
     pressure_pred = pred[:, :, 3]
     pressure_target = target[:, :, 3]
 
-    velocity_pred = torch.sqrt(torch.sum(pred[:, :, 0:3]**2.0, dim=2))
-    velocity_target = torch.sqrt(torch.sum(target[:, :, 0:3]**2.0, dim=2))
+    velocity_pred = torch.sqrt(torch.sum(pred[:, :, 0:3] ** 2.0, dim=2))
+    velocity_target = torch.sqrt(torch.sum(target[:, :, 0:3] ** 2.0, dim=2))
 
     # L1 errors
     l1_num = torch.abs(pred - target)
@@ -131,7 +131,7 @@ def metrics_fn_volume(
     mae_num = torch.abs(pred - target)
     mae_num_vel = torch.abs(velocity_pred - velocity_target)
     mae_pressure = torch.abs(pressure_pred - pressure_target)
-    
+
     # L2 errors
     l2_num = (pred - target) ** 2
     l2_num = torch.sum(l2_num, dim=1)
@@ -203,8 +203,8 @@ def metrics_fn_surface(
     pressure_pred = pred[:, :, 0]
     pressure_target = target[:, :, 0]
 
-    wall_shear_pred = torch.sqrt(torch.sum(pred[:, :, 1:4]**2.0, dim=2))
-    wall_shear_target = torch.sqrt(torch.sum(target[:, :, 1:4]**2.0, dim=2))
+    wall_shear_pred = torch.sqrt(torch.sum(pred[:, :, 1:4] ** 2.0, dim=2))
+    wall_shear_target = torch.sqrt(torch.sum(target[:, :, 1:4] ** 2.0, dim=2))
 
     # MAE
     mae_num = torch.abs(pred - target)
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/train.py b/examples/cfd/external_aerodynamics/transformer_models/src/train.py
index b5af14e686..b4ddacfcb1 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/train.py
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/train.py
@@ -56,10 +56,6 @@
 
 # Local folder imports for this example
 from metrics import metrics_fn
-from preprocess import (
-    preprocess_surface_data,
-    downsample_surface,
-)
 
 # tensorwise is to handle single-point-cloud or multi-point-cloud running.
 # it's a decorator that will automatically unzip one or more of a list of tensors,
@@ -310,7 +306,10 @@ def forward_pass(
             local_positions = embeddings[:, :, :3]
             # This is the Typhon path
             outputs = model(
-                global_embedding=features, local_embedding=embeddings, geometry=geometry, local_positions=local_positions
+                global_embedding=features,
+                local_embedding=embeddings,
+                geometry=geometry,
+                local_positions=local_positions,
             )
 
             outputs = unpad_output_for_fp8(outputs, output_pad_size)
@@ -357,47 +356,6 @@ def forward_pass(
     )
     all_metrics.update(metrics)
 
-    # if "geometry" in batch.keys():
-    #     print(f"HERE")
-    #     unscaled_outputs = []
-    #     unscaled_targets = []
-    #     for i in range(len(outputs)):
-    #         local_unscaled_outputs = datapipe.unscale_model_targets(
-    #             outputs[i],
-    #             air_density=air_density,
-    #             stream_velocity=stream_velocity,
-    #             factor_type=modes[i],
-    #         )
-    #         local_unscaled_targets = datapipe.unscale_model_targets(
-    #             targets[i],
-    #             air_density=air_density,
-    #             stream_velocity=stream_velocity,
-    #             factor_type=modes[i],
-    #         )
-    #         print(f"local_unscaled_outputs: {local_unscaled_outputs.shape}")
-    #         print(f"local_unscaled_targets: {local_unscaled_targets.shape}")
-    #         metrics = metrics_fn(local_unscaled_outputs, local_unscaled_targets, dist_manager, modes[i])
-    #         print(f"metrics: {metrics}")
-    #         all_metrics.update(metrics)
-    #         unscaled_outputs.append(local_unscaled_outputs)
-    #         unscaled_targets.append(local_unscaled_targets)
-    # else:
-    #     unscaled_outputs = datapipe.unscale_model_targets(
-    #         outputs,
-    #         air_density=air_density,
-    #         stream_velocity=stream_velocity,
-    #         factor_type=modes[0],
-    #     )
-    #     unscaled_targets = datapipe.unscale_model_targets(
-    #         targets,
-    #         air_density=air_density,
-    #         stream_velocity=stream_velocity,
-    #         factor_type=modes[0],
-    #     )
-
-    #     metrics = metrics_fn(unscaled_outputs, unscaled_targets, dist_manager, modes[0])
-    #     all_metrics.update(metrics)
-
     return full_loss, all_metrics, (unscaled_outputs, unscaled_targets)
 
 
@@ -476,9 +434,7 @@ def train_epoch(
         if i == 0:
             total_metrics = metrics
         else:
-            total_metrics = {
-                k: total_metrics[k] + metrics[k] for k in metrics.keys()
-            }
+            total_metrics = {k: total_metrics[k] + metrics[k] for k in metrics.keys()}
 
         duration = end_time - start_time
         start_time = end_time

From d2d617209895ae9f7c276955b86cd1a60c04c4e4 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 23 Dec 2025 11:24:05 -0600
Subject: [PATCH 27/32] Fix precommit

---
 .../src/inference_on_zarr.py                  | 45 +++++++++++++------
 1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/inference_on_zarr.py b/examples/cfd/external_aerodynamics/transformer_models/src/inference_on_zarr.py
index 96bd1c388c..b8195ff327 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/inference_on_zarr.py
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/inference_on_zarr.py
@@ -324,16 +324,22 @@ def inference(cfg: DictConfig) -> None:
         start = time.time()
 
         air_density = batch["air_density"] if "air_density" in batch.keys() else None
-        stream_velocity = batch["stream_velocity"] if "stream_velocity" in batch.keys() else None
+        stream_velocity = (
+            batch["stream_velocity"] if "stream_velocity" in batch.keys() else None
+        )
 
         if cfg.data.mode == "surface":
             coeff = 1.0
 
             if stream_velocity is not None:
-                global_predictions = global_predictions * stream_velocity**2.0 * air_density
+                global_predictions = (
+                    global_predictions * stream_velocity**2.0 * air_density
+                )
                 global_targets = global_targets * stream_velocity**2.0 * air_density
 
-            metrics = metrics_fn_surface(global_predictions, global_targets, dist_manager)
+            metrics = metrics_fn_surface(
+                global_predictions, global_targets, dist_manager
+            )
             # Compute the drag and loss coefficients:
             # (Index on [0] is to remove the 1 batch index)
             pred_pressure, pred_shear = torch.split(
@@ -359,7 +365,6 @@ def inference(cfg: DictConfig) -> None:
                 torch.tensor([[0, 0, 1]], device=dist_manager.device),
             )
 
-            
             # true_fields = val_dataset.unscale_model_targets(batch["fields"], air_density=air_density, stream_velocity=stream_velocity)
             true_pressure, true_shear = torch.split(global_targets[0], (1, 3), dim=-1)
 
@@ -437,14 +442,26 @@ def inference(cfg: DictConfig) -> None:
 
         elif cfg.data.mode == "volume":
             if stream_velocity is not None:
-                global_predictions[:, :, 3] = global_predictions[:, :, 3] * stream_velocity**2.0 * air_density
-                global_targets[:, :, 3] = global_targets[:, :, 3] * stream_velocity**2.0 * air_density
-                global_predictions[:, :, 0:3] = global_predictions[:, :, 0:3] * stream_velocity
+                global_predictions[:, :, 3] = (
+                    global_predictions[:, :, 3] * stream_velocity**2.0 * air_density
+                )
+                global_targets[:, :, 3] = (
+                    global_targets[:, :, 3] * stream_velocity**2.0 * air_density
+                )
+                global_predictions[:, :, 0:3] = (
+                    global_predictions[:, :, 0:3] * stream_velocity
+                )
                 global_targets[:, :, 0:3] = global_targets[:, :, 0:3] * stream_velocity
-                global_predictions[:, :, 4] = global_predictions[:, :, 4] * stream_velocity**2.0 * air_density
-                global_targets[:, :, 4] = global_targets[:, :, 4] * stream_velocity**2.0 * air_density
-            
-            metrics = metrics_fn_volume(global_predictions, global_targets, dist_manager)
+                global_predictions[:, :, 4] = (
+                    global_predictions[:, :, 4] * stream_velocity**2.0 * air_density
+                )
+                global_targets[:, :, 4] = (
+                    global_targets[:, :, 4] * stream_velocity**2.0 * air_density
+                )
+
+            metrics = metrics_fn_volume(
+                global_predictions, global_targets, dist_manager
+            )
             # Extract metric values and convert tensors to floats
             l2_pressure = (
                 metrics["l2_pressure_vol"].item()
@@ -476,7 +493,7 @@ def inference(cfg: DictConfig) -> None:
                 if hasattr(metrics["mae_velocity"], "item")
                 else metrics["mae_velocity"]
             )
-            
+
             l2_nut = (
                 metrics["l2_nut"].item()
                 if hasattr(metrics["l2_nut"], "item")
@@ -541,7 +558,7 @@ def inference(cfg: DictConfig) -> None:
         logger.info(f"R2 score for lift: {r2_lift:.4f}")
         logger.info(f"R2 score for drag: {r2_drag:.4f}")
         csv_filename = f"{cfg.output_dir}/{cfg.run_id}/surface_inference_results_{datetime.now()}.csv"
-        with open(csv_filename, 'w', newline='') as f:
+        with open(csv_filename, "w", newline="") as f:
             writer = csv.writer(f)
             writer.writerow(headers)
             writer.writerows(results)
@@ -566,7 +583,7 @@ def inference(cfg: DictConfig) -> None:
             f"Results:\n{tabulate(results, headers=headers, tablefmt='github')}"
         )
         csv_filename = f"{cfg.output_dir}/{cfg.run_id}/volume_inference_results_{datetime.now()}.csv"
-        with open(csv_filename, 'w', newline='') as f:
+        with open(csv_filename, "w", newline="") as f:
             writer = csv.writer(f)
             writer.writerow(headers)
             writer.writerows(results)

From 75997a2d5937fa465e82ed1dba046d21ad967d60 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 2 Jan 2026 19:40:32 +0000
Subject: [PATCH 28/32] Update geotransolver naming

---
 .../transformer_models/README.md              | 12 +++----
 .../src/compute_normalizations.py             |  1 -
 .../src/conf/data/volume.yaml                 |  5 +--
 .../src/conf/transolver_surface.yaml          |  5 +--
 .../src/conf/transolver_volume.yaml           |  2 +-
 .../geotransolver/test_context_projector.py   | 32 -------------------
 test/models/geotransolver/test_gale.py        |  5 ---
 .../geotransolver/test_geotransolver.py       | 16 +++-------
 8 files changed, 18 insertions(+), 60 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/transformer_models/README.md b/examples/cfd/external_aerodynamics/transformer_models/README.md
index d6750ac47f..463285822e 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/README.md
+++ b/examples/cfd/external_aerodynamics/transformer_models/README.md
@@ -1,7 +1,7 @@
 <!-- markdownlint-disable -->
 # Transformer Models for External Aerodynamics on Irregular Meshes
 
-This directory contains training and inference recipes for transformer-based surrogate models for CFD applications. This is a collection of transformer models including `Transolver` and `Typhon`, both of which can be run on surface or volume data.
+This directory contains training and inference recipes for transformer-based surrogate models for CFD applications. This is a collection of transformer models including `Transolver` and `GeoTransolver`, both of which can be run on surface or volume data.
 
 ## Models Overview
 
@@ -11,11 +11,11 @@ This directory contains training and inference recipes for transformer-based sur
 
 By stacking multiple PhysicsAttention layers, the `Transolver` model learns to map from the functional input space to the output space with high fidelity. The PhysicsNeMo implementation closely follows the original Transolver architecture ([https://github.com/thuml/Transolver](https://github.com/thuml/Transolver)), but introduces modifications for improved numerical stability and compatibility with NVIDIA TransformerEngine.
 
-### Typhon
+### GeoTranSolver
 
-Typhon adapts the Transolver backbone by replacing standard attention with GALE (Geometry-Aware Latent Embeddings) attention, which unifies physics-aware self-attention on learned state slices with cross-attention to geometry and global context embeddings. Inspired by Domino's multi-scale ball query formulations, Typhon learns global geometry encodings and local latent encodings that capture neighborhoods at multiple radii, preserving fine-grained near-boundary behavior and far-field interactions. Crucially, geometry and global features are projected into physical state spaces and injected as context in every transformer block, ensuring persistent conditioning and alignment between evolving latent states and the underlying domain.
+GeoTransolver adapts the Transolver backbone by replacing standard attention with GALE (Geometry-Aware Latent Embeddings) attention, which unifies physics-aware self-attention on learned state slices with cross-attention to geometry and global context embeddings. Inspired by Domino's multi-scale ball query formulations, GeoTransolver learns global geometry encodings and local latent encodings that capture neighborhoods at multiple radii, preserving fine-grained near-boundary behavior and far-field interactions. Crucially, geometry and global features are projected into physical state spaces and injected as context in every transformer block, ensuring persistent conditioning and alignment between evolving latent states and the underlying domain.
 
-GALE directly targets core challenges in AI physics modeling. By structuring self-attention around physics-aware slices, Typhon encourages interactions that reflect operator couplings (e.g., pressure–velocity or field–material). Multi-scale ball queries enforce locality where needed while maintaining access to global signals, balancing efficiency with nonlocal reasoning. Continuous geometry-context projection at depth mitigates representation drift and improves stability, while providing a natural interface for constraint-aware training and regularization. Together, these design choices enhance accuracy, robustness to geometric and regime shifts, and scalability on large, irregular discretizations.
+GALE directly targets core challenges in AI physics modeling. By structuring self-attention around physics-aware slices, GeoTransolver encourages interactions that reflect operator couplings (e.g., pressure–velocity or field–material). Multi-scale ball queries enforce locality where needed while maintaining access to global signals, balancing efficiency with nonlocal reasoning. Continuous geometry-context projection at depth mitigates representation drift and improves stability, while providing a natural interface for constraint-aware training and regularization. Together, these design choices enhance accuracy, robustness to geometric and regime shifts, and scalability on large, irregular discretizations.
 
 ## External Aerodynamics CFD Example: Overview
 
@@ -31,7 +31,7 @@ These transformer models can use TransformerEngine from NVIDIA, as well as tenso
 
 1. Prepare the Dataset. These models use the same Zarr outputs as other models with DrivaerML. `PhysicsNeMo` has a related project to help with data processing, called [PhysicsNeMo-Curator](https://github.com/NVIDIA/physicsnemo-curator). Using `PhysicsNeMo-Curator`, the data needed to train can be setup easily. Please refer to [these instructions on getting started](https://github.com/NVIDIA/physicsnemo-curator?tab=readme-ov-file#what-is-physicsnemo-curator) with `PhysicsNeMo-Curator`. For specifics of preparing the dataset for this example, see the [download](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/README.md#download-drivaerml-dataset) and [preprocessing](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/README.md) instructions from `physicsnemo-curator`. Users should apply the preprocessing steps locally to produce `zarr` output files.
 
-2. Train your model. The model and training configuration is configured with `hydra`, and configurations are available for both surface and volume modes (e.g., `transolver_surface`, `transolver_volume`, `typhon_surface`, `typhon_volume`). Find configurations in `src/conf`, where you can control both network properties and training properties. See below for an overview and explanation of key parameters that may be of special interest.
+2. Train your model. The model and training configuration is configured with `hydra`, and configurations are available for both surface and volume modes (e.g., `transolver_surface`, `transolver_volume`, `geotransolver_surface`, `geotransolver_volume`). Find configurations in `src/conf`, where you can control both network properties and training properties. See below for an overview and explanation of key parameters that may be of special interest.
 
 3. Use the trained model to perform inference. This example contains inference examples for the validation set, already in Zarr format. The `.vtp` inference pipeline is being updated to accommodate these models.
 
@@ -43,7 +43,7 @@ To train the model, first we compute normalization factors on the dataset to mak
 
 > By default, the normalization sets the mean to 0.0 and std to 1.0 of all labels in the dataset, computing the mean across the train dataset. You could adapt this to a different normalization, however take care to update both the preprocessing as well as inference scripts. Min/Max is another popular strategy.
 
-To configure your training run, use `hydra`. The config contains sections for the model, data, optimizer, and training settings. For details on the model parameters, see the API for `physicsnemo.models.transolver` and `physicsnemo.experimental.models.typhon`.
+To configure your training run, use `hydra`. The config contains sections for the model, data, optimizer, and training settings. For details on the model parameters, see the API for `physicsnemo.models.transolver` and `physicsnemo.experimental.models.geotransolver`.
 
 To fit the training into memory, you can apply on-the-fly downsampling to the data with `data.resolution=N`, where `N` is how many points per GPU to use. This dataloader will yield the full data examples in shapes of `[1, K, f]` where `K` is the resolution of the mesh, and `f` is the feature space (3 for points, normals, etc. 4 for surface fields). Downsampling happens in the preprocessing pipeline.
 
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/compute_normalizations.py b/examples/cfd/external_aerodynamics/transformer_models/src/compute_normalizations.py
index 749a7ab2f7..13dcff04f1 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/compute_normalizations.py
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/compute_normalizations.py
@@ -89,7 +89,6 @@ def compute_mean_std_min_max(
 
         # Update running mean and M2 (Welford's algorithm)
         delta = batch_mean - mean
-        N += batch_n
         mean = mean + delta * (batch_n / N)
         M2 = M2 + batch_M2 + delta**2 * (batch_n * N) / N
         time_end = time.time()
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/data/volume.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/data/volume.yaml
index 4b7fb3445d..9d34cc406a 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/data/volume.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/data/volume.yaml
@@ -20,10 +20,11 @@ defaults:
 # Overrides for volume data:
 mode: volume
 
-# volume-speficic needs:
+# volume-specific needs:
 data_keys:
   - "volume_fields"
   - "volume_mesh_centers"
   - "stl_faces"
   - "stl_centers"
-  - "stl_coordinates"
\ No newline at end of file
+  - "stl_coordinates"
+  
\ No newline at end of file
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml
index 1da49c92a0..47c3f8a59d 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_surface.yaml
@@ -21,7 +21,7 @@ defaults:
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
-run_id: "surface/bfloat16"
+run_id: "surface/float32"
 
 # Performance considerations:
 precision: float32 # float32, float16, bfloat16, or float8
@@ -34,4 +34,5 @@ data:
 # Logging configuration
 logging:
   level: INFO
-  format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
\ No newline at end of file
+  format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+  
\ No newline at end of file
diff --git a/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml
index bab0b9aedf..7d0c8eb249 100644
--- a/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml
+++ b/examples/cfd/external_aerodynamics/transformer_models/src/conf/transolver_volume.yaml
@@ -21,7 +21,7 @@ defaults:
 
 output_dir: "runs"
 checkpoint_dir: null  # Optional: set custom checkpoint path, defaults to output_dir
-run_id: "volume/bfloat16"
+run_id: "volume/float32"
 
 # Performance considerations:
 precision: float32 # float32, float16, bfloat16, or float8
diff --git a/test/models/geotransolver/test_context_projector.py b/test/models/geotransolver/test_context_projector.py
index 1274c10485..524fce9a44 100644
--- a/test/models/geotransolver/test_context_projector.py
+++ b/test/models/geotransolver/test_context_projector.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pytest
 import torch
 
 from physicsnemo.experimental.models.geotransolver.context_projector import (
@@ -26,7 +25,6 @@
 # =============================================================================
 
 
-@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
 def test_context_projector_forward(device):
     """Test ContextProjector forward pass."""
     torch.manual_seed(42)
@@ -55,33 +53,3 @@ def test_context_projector_forward(device):
     # Output shape: [Batch, Heads, Slice_num, dim_head]
     assert slice_tokens.shape == (batch_size, heads, slice_num, dim_head)
     assert not torch.isnan(slice_tokens).any()
-
-
-@pytest.mark.parametrize("device", ["cuda:0"])
-def test_context_projector_plus_mode(device):
-    """Test ContextProjector with Transolver++ mode."""
-    torch.manual_seed(42)
-
-    dim = 64
-    heads = 4
-    dim_head = 16
-    slice_num = 8
-    batch_size = 2
-    n_tokens = 100
-
-    projector = ContextProjector(
-        dim=dim,
-        heads=heads,
-        dim_head=dim_head,
-        dropout=0.0,
-        slice_num=slice_num,
-        use_te=False,
-        plus=True,
-    ).to(device)
-
-    x = torch.randn(batch_size, n_tokens, dim).to(device)
-
-    slice_tokens = projector(x)
-
-    assert slice_tokens.shape == (batch_size, heads, slice_num, dim_head)
-    assert not torch.isnan(slice_tokens).any()
diff --git a/test/models/geotransolver/test_gale.py b/test/models/geotransolver/test_gale.py
index 40ac14ffeb..aa98175075 100644
--- a/test/models/geotransolver/test_gale.py
+++ b/test/models/geotransolver/test_gale.py
@@ -27,7 +27,6 @@
 # =============================================================================
 
 
-@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
 def test_gale_forward_basic(device):
     """Test GALE attention layer forward pass without context."""
     torch.manual_seed(42)
@@ -60,7 +59,6 @@ def test_gale_forward_basic(device):
     assert not torch.isnan(outputs[0]).any()
 
 
-@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
 def test_gale_forward_with_context(device):
     """Test GALE attention layer forward pass with cross-attention context."""
     torch.manual_seed(42)
@@ -95,7 +93,6 @@ def test_gale_forward_with_context(device):
     assert not torch.isnan(outputs[0]).any()
 
 
-@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
 def test_gale_forward_multiple_inputs(device):
     """Test GALE attention layer with multiple input tensors."""
     torch.manual_seed(42)
@@ -169,7 +166,6 @@ def test_gale_plus_mode(device):
 # =============================================================================
 
 
-@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
 def test_gale_block_forward(device):
     """Test GALE_block transformer block forward pass."""
     torch.manual_seed(42)
@@ -205,7 +201,6 @@ def test_gale_block_forward(device):
     assert not torch.isnan(outputs[0]).any()
 
 
-@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
 def test_gale_block_multiple_inputs(device):
     """Test GALE_block with multiple input tensors."""
     torch.manual_seed(42)
diff --git a/test/models/geotransolver/test_geotransolver.py b/test/models/geotransolver/test_geotransolver.py
index c5eab351e8..13d970f1ba 100644
--- a/test/models/geotransolver/test_geotransolver.py
+++ b/test/models/geotransolver/test_geotransolver.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import random
 
 import pytest
 import torch
@@ -37,7 +36,6 @@
 # =============================================================================
 
 
-@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
 @pytest.mark.parametrize("use_geometry", [False, True])
 @pytest.mark.parametrize("use_global", [False, True])
 def test_geotransolver_forward(device, use_geometry, use_global):
@@ -88,7 +86,6 @@ def test_geotransolver_forward(device, use_geometry, use_global):
     assert not torch.isnan(outputs).any()
 
 
-@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
 def test_geotransolver_forward_tuple_inputs(device):
     """Test GeoTransolver model forward pass with tuple inputs/outputs (multi-head)."""
     torch.manual_seed(42)
@@ -189,7 +186,7 @@ def test_geotransolver_forward_with_local_features(device, pytestconfig):
 
     assert isinstance(outputs, torch.Tensor)
     assert outputs.shape == (batch_size, n_tokens, 4)
-    assert not torch.isnan(outputs[0]).any()
+    assert not torch.isnan(outputs).any()
 
 
 # =============================================================================
@@ -197,7 +194,6 @@ def test_geotransolver_forward_with_local_features(device, pytestconfig):
 # =============================================================================
 
 
-@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
 def test_geotransolver_forward_accuracy_basic(device):
     """Test GeoTransolver basic forward pass accuracy."""
     torch.manual_seed(42)
@@ -238,7 +234,6 @@ def test_geotransolver_forward_accuracy_basic(device):
     )
 
 
-@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
 def test_geotransolver_forward_accuracy_tuple(device):
     """Test GeoTransolver forward pass accuracy with tuple inputs."""
     torch.manual_seed(42)
@@ -299,6 +294,7 @@ def test_geotransolver_forward_accuracy_tuple(device):
 @pytest.mark.parametrize("device", ["cuda:0"])
 def test_geotransolver_optimizations(device):
     """Test GeoTransolver optimizations (CUDA graphs, JIT, AMP, combo)."""
+    torch.manual_seed(42)
 
     def setup_model():
         """Setup fresh GeoTransolver model and inputs for each optimization test."""
@@ -408,7 +404,7 @@ def test_geotransolver_te_basic(device, pytestconfig):
 
     assert isinstance(outputs, torch.Tensor)
     assert outputs.shape == (batch_size, n_tokens, 4)
-    assert not torch.isnan(outputs[0]).any()
+    assert not torch.isnan(outputs).any()
 
 
 # =============================================================================
@@ -416,7 +412,6 @@ def test_geotransolver_te_basic(device, pytestconfig):
 # =============================================================================
 
 
-@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
 def test_geotransolver_checkpoint(device):
     """Test GeoTransolver checkpoint save/load."""
     torch.manual_seed(42)
@@ -457,7 +452,7 @@ def test_geotransolver_checkpoint(device):
         include_local_features=False,
     ).to(device)
 
-    batch_size = random.randint(1, 2)
+    batch_size = 2
     n_tokens = 100
     n_global = 5
 
@@ -472,7 +467,6 @@ def test_geotransolver_checkpoint(device):
     )
 
 
-@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
 def test_geotransolver_checkpoint_tuple(device):
     """Test GeoTransolver checkpoint save/load with tuple inputs."""
     torch.manual_seed(42)
@@ -516,7 +510,7 @@ def test_geotransolver_checkpoint_tuple(device):
         include_local_features=False,
     ).to(device)
 
-    batch_size = random.randint(1, 2)
+    batch_size = 2
     n_tokens_1 = 100
     n_tokens_2 = 150
     n_global = 5

From 618f8e93776331290f4d796ad8d579a21c810932 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 9 Jan 2026 22:28:58 +0000
Subject: [PATCH 29/32] Update geotransolver tests.

---
 test/models/geotransolver/test_gale.py        | 70 -------------------
 .../geotransolver/test_geotransolver.py       |  2 -
 2 files changed, 72 deletions(-)

diff --git a/test/models/geotransolver/test_gale.py b/test/models/geotransolver/test_gale.py
index aa98175075..32d9913c81 100644
--- a/test/models/geotransolver/test_gale.py
+++ b/test/models/geotransolver/test_gale.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pytest
 import torch
 
 from physicsnemo.experimental.models.geotransolver.gale import (
@@ -129,38 +128,6 @@ def test_gale_forward_multiple_inputs(device):
     assert not torch.isnan(outputs[1]).any()
 
 
-@pytest.mark.parametrize("device", ["cuda:0"])
-def test_gale_plus_mode(device):
-    """Test GALE attention with Transolver++ mode."""
-    torch.manual_seed(42)
-
-    dim = 64
-    heads = 4
-    dim_head = 16
-    slice_num = 8
-    batch_size = 2
-    n_tokens = 100
-
-    gale = GALE(
-        dim=dim,
-        heads=heads,
-        dim_head=dim_head,
-        dropout=0.0,
-        slice_num=slice_num,
-        use_te=False,
-        plus=True,  # Enable Transolver++ features
-        context_dim=dim_head,
-    ).to(device)
-
-    x = torch.randn(batch_size, n_tokens, dim).to(device)
-
-    outputs = gale((x,), context=None)
-
-    assert len(outputs) == 1
-    assert outputs[0].shape == (batch_size, n_tokens, dim)
-    assert not torch.isnan(outputs[0]).any()
-
-
 # =============================================================================
 # GALE_block Tests
 # =============================================================================
@@ -236,40 +203,3 @@ def test_gale_block_multiple_inputs(device):
     assert len(outputs) == 2
     assert outputs[0].shape == (batch_size, n_tokens_1, hidden_dim)
     assert outputs[1].shape == (batch_size, n_tokens_2, hidden_dim)
-
-
-@pytest.mark.parametrize("device", ["cuda:0"])
-def test_gale_mixing_weight_gradient(device):
-    """Test that GALE mixing weight receives gradients."""
-    torch.manual_seed(42)
-
-    dim = 64
-    heads = 4
-    dim_head = 16
-    slice_num = 8
-    batch_size = 2
-    n_tokens = 100
-    context_tokens = 32
-    context_dim = dim_head
-
-    gale = GALE(
-        dim=dim,
-        heads=heads,
-        dim_head=dim_head,
-        dropout=0.0,
-        slice_num=slice_num,
-        use_te=False,
-        plus=False,
-        context_dim=context_dim,
-    ).to(device)
-
-    x = torch.randn(batch_size, n_tokens, dim, requires_grad=True).to(device)
-    context = torch.randn(batch_size, heads, context_tokens, context_dim).to(device)
-
-    outputs = gale((x,), context=context)
-    loss = outputs[0].sum()
-    loss.backward()
-
-    # Check that state_mixing parameter receives gradient
-    assert gale.state_mixing.grad is not None
-    assert gale.state_mixing.grad != 0
diff --git a/test/models/geotransolver/test_geotransolver.py b/test/models/geotransolver/test_geotransolver.py
index 13d970f1ba..2815b9f7bd 100644
--- a/test/models/geotransolver/test_geotransolver.py
+++ b/test/models/geotransolver/test_geotransolver.py
@@ -140,7 +140,6 @@ def test_geotransolver_forward_tuple_inputs(device):
 
 
 @requires_module("warp")
-@pytest.mark.parametrize("device", ["cuda:0"])
 def test_geotransolver_forward_with_local_features(device, pytestconfig):
     """Test GeoTransolver model forward pass with local features (BQ warp)."""
     torch.manual_seed(42)
@@ -291,7 +290,6 @@ def test_geotransolver_forward_accuracy_tuple(device):
 # =============================================================================
 
 
-@pytest.mark.parametrize("device", ["cuda:0"])
 def test_geotransolver_optimizations(device):
     """Test GeoTransolver optimizations (CUDA graphs, JIT, AMP, combo)."""
     torch.manual_seed(42)

From d8e169641c16be0368e5bf67ba3abd5cbd639347 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 9 Jan 2026 22:48:08 +0000
Subject: [PATCH 30/32] Auto device instead of cuda ...

---
 test/models/geotransolver/test_geotransolver.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/test/models/geotransolver/test_geotransolver.py b/test/models/geotransolver/test_geotransolver.py
index 2815b9f7bd..47b39a3b8a 100644
--- a/test/models/geotransolver/test_geotransolver.py
+++ b/test/models/geotransolver/test_geotransolver.py
@@ -360,11 +360,13 @@ def setup_model():
 
 
 @requires_module("transformer_engine")
-@pytest.mark.parametrize("device", ["cuda:0"])
 def test_geotransolver_te_basic(device, pytestconfig):
     """Test GeoTransolver with Transformer Engine backend."""
     torch.manual_seed(42)
 
+    if device == "cpu":
+        pytest.skip("TE Tests require cuda.")
+
     model = GeoTransolver(
         functional_dim=32,
         out_dim=4,
@@ -559,7 +561,6 @@ def test_geotransolver_mismatched_functional_out_dims():
 # =============================================================================
 
 
-@pytest.mark.parametrize("device", ["cuda:0"])
 @pytest.mark.parametrize("activation", ["gelu", "relu", "tanh", "silu"])
 def test_geotransolver_activations(device, activation):
     """Test GeoTransolver with different activation functions."""
@@ -606,7 +607,6 @@ def test_geotransolver_activations(device, activation):
 # =============================================================================
 
 
-@pytest.mark.parametrize("device", ["cuda:0"])
 @pytest.mark.parametrize("n_layers", [1, 2, 4])
 def test_geotransolver_different_depths(device, n_layers):
     """Test GeoTransolver with different numbers of layers."""
@@ -648,7 +648,6 @@ def test_geotransolver_different_depths(device, n_layers):
     assert not torch.isnan(outputs).any()
 
 
-@pytest.mark.parametrize("device", ["cuda:0"])
 @pytest.mark.parametrize("slice_num", [4, 16, 32])
 def test_geotransolver_different_slice_nums(device, slice_num):
     """Test GeoTransolver with different numbers of physical state slices."""
@@ -690,7 +689,6 @@ def test_geotransolver_different_slice_nums(device, slice_num):
     assert not torch.isnan(outputs).any()
 
 
-@pytest.mark.parametrize("device", ["cuda:0"])
 @pytest.mark.parametrize("n_hidden,n_head", [(64, 4), (128, 8), (256, 8)])
 def test_geotransolver_different_hidden_sizes(device, n_hidden, n_head):
     """Test GeoTransolver with different hidden dimensions and head counts."""

From 9ca4e68eb0a991e1b98a3cbf7e9f55c2fd2b798c Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 9 Jan 2026 22:59:59 +0000
Subject: [PATCH 31/32] Comply with model standards.

---
 .../models/geotransolver/__init__.py          |  50 +-
 .../models/geotransolver/context_projector.py | 459 ++++++++++++++----
 .../experimental/models/geotransolver/gale.py | 328 +++++++++----
 .../models/geotransolver/geotransolver.py     | 404 ++++++++++-----
 4 files changed, 929 insertions(+), 312 deletions(-)

diff --git a/physicsnemo/experimental/models/geotransolver/__init__.py b/physicsnemo/experimental/models/geotransolver/__init__.py
index febfc7b90f..2a5672e971 100644
--- a/physicsnemo/experimental/models/geotransolver/__init__.py
+++ b/physicsnemo/experimental/models/geotransolver/__init__.py
@@ -14,6 +14,52 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .geotransolver import GeoTransolver
+r"""GeoTransolver: Geometry-Aware Physics Attention Transformer.
 
-__all__ = ["GeoTransolver"]
+This module provides the GeoTransolver model and its components for learning
+physics-based representations with geometry and global context awareness.
+
+Classes
+-------
+GeoTransolver
+    Main model class combining GALE attention with geometry and global context.
+GALE
+    Geometry-Aware Latent Embeddings attention layer.
+GALE_block
+    Transformer block using GALE attention.
+ContextProjector
+    Projects context features onto physical state slices.
+GlobalContextBuilder
+    Orchestrates context construction for the model.
+
+Examples
+--------
+Basic usage:
+
+>>> import torch
+>>> from physicsnemo.experimental.models.geotransolver import GeoTransolver
+>>> model = GeoTransolver(
+...     functional_dim=64,
+...     out_dim=3,
+...     n_hidden=256,
+...     n_layers=4,
+...     use_te=False,
+... )
+>>> x = torch.randn(2, 1000, 64)
+>>> output = model(x)
+>>> output.shape
+torch.Size([2, 1000, 3])
+"""
+
+from .context_projector import ContextProjector, GlobalContextBuilder
+from .gale import GALE, GALE_block
+from .geotransolver import GeoTransolver, GeoTransolverMetaData
+
+__all__ = [
+    "GeoTransolver",
+    "GeoTransolverMetaData",
+    "GALE",
+    "GALE_block",
+    "ContextProjector",
+    "GlobalContextBuilder",
+]
\ No newline at end of file
diff --git a/physicsnemo/experimental/models/geotransolver/context_projector.py b/physicsnemo/experimental/models/geotransolver/context_projector.py
index 3a1b68da77..56bde60b06 100644
--- a/physicsnemo/experimental/models/geotransolver/context_projector.py
+++ b/physicsnemo/experimental/models/geotransolver/context_projector.py
@@ -14,16 +14,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Context Projector for GeoTransolver model.
-
-This module provides the ContextProjector class, which projects context features
-(geometry or global embeddings) onto learned physical state spaces for use in
-GALE attention layers.
+r"""Context Projector for GeoTransolver model.
+
+This module provides classes for projecting context features (geometry or global
+embeddings) onto learned physical state spaces for use in GALE attention layers.
+
+Classes
+-------
+ContextProjector
+    Projects context features onto physical state slices.
+GeometricFeatureProcessor
+    Processes geometric features at a single spatial scale using BQWarp.
+MultiScaleFeatureExtractor
+    Multi-scale geometric feature extraction with minimal complexity.
+GlobalContextBuilder
+    Orchestrates all context construction for the GeoTransolver model.
 """
 
+from __future__ import annotations
+
 import torch
 import torch.nn as nn
 from einops import rearrange
+from jaxtyping import Float
 
 from physicsnemo.core.version_check import check_version_spec
 from physicsnemo.models.transolver.Physics_Attention import gumbel_softmax
@@ -58,9 +71,21 @@ class ContextProjector(nn.Module):
     slice_num : int, optional
         Number of learned physical state slices. Default is 64.
     use_te : bool, optional
-        Whether to use Transformer Engine backend when available. Default is True.
+        Whether to use Transformer Engine backend when available. Default is ``True``.
     plus : bool, optional
-        Whether to use Transolver++ features. Default is False.
+        Whether to use Transolver++ features. Default is ``False``.
+
+    Forward
+    -------
+    x : torch.Tensor
+        Input tensor of shape :math:`(B, N, C)` where :math:`B` is batch size,
+        :math:`N` is number of tokens, and :math:`C` is number of channels.
+
+    Outputs
+    -------
+    torch.Tensor
+        Slice tokens of shape :math:`(B, H, S, D)` where :math:`H` is number of heads,
+        :math:`S` is number of slices, and :math:`D` is head dimension.
 
     Notes
     -----
@@ -69,8 +94,17 @@ class ContextProjector(nn.Module):
 
     See Also
     --------
-    :class:`GALE` : Full GALE attention layer that uses these projected context features.
-    :class:`GeoTransolver` : Main model that uses ContextProjector for geometry and global embeddings.
+    :class:`~physicsnemo.experimental.models.geotransolver.gale.GALE` : Full GALE attention layer that uses these projected context features.
+    :class:`~physicsnemo.experimental.models.geotransolver.GeoTransolver` : Main model that uses ContextProjector for geometry and global embeddings.
+
+    Examples
+    --------
+    >>> import torch
+    >>> projector = ContextProjector(dim=64, heads=8, dim_head=32, slice_num=32)
+    >>> x = torch.randn(2, 100, 64)  # (batch, tokens, features)
+    >>> slice_tokens = projector(x)
+    >>> slice_tokens.shape
+    torch.Size([2, 8, 32, 32])
     """
 
     def __init__(
@@ -82,7 +116,7 @@ def __init__(
         slice_num: int = 64,
         use_te: bool = True,
         plus: bool = False,
-    ):
+    ) -> None:
         super().__init__()
         inner_dim = dim_head * heads
         self.dim_head = dim_head
@@ -94,7 +128,7 @@ def __init__(
         # Choose linear layer implementation based on backend
         linear_layer = te.Linear if (use_te and TE_AVAILABLE) else nn.Linear
 
-        # Input projection layers
+        # Input projection layers for query and key
         self.in_project_x = linear_layer(dim, inner_dim)
         if not plus:
             self.in_project_fx = linear_layer(dim, inner_dim)
@@ -113,35 +147,41 @@ def __init__(
                 nn.GELU(),
             )
 
-        # Slice projection layer
+        # Slice projection layer maps from head dimension to slice space
         self.in_project_slice = linear_layer(dim_head, slice_num)
 
     def project_input_onto_slices(
-        self, x: torch.Tensor
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        self, x: Float[torch.Tensor, "batch tokens channels"]
+    ) -> (
+        Float[torch.Tensor, "batch heads tokens dim"]
+        | tuple[
+            Float[torch.Tensor, "batch heads tokens dim"],
+            Float[torch.Tensor, "batch heads tokens dim"],
+        ]
+    ):
         r"""Project the input onto the slice space.
 
         Parameters
         ----------
         x : torch.Tensor
-            Input tensor of shape \((B, N, C)\) where \(B\) is batch size,
-            \(N\) is number of tokens, and \(C\) is number of channels.
+            Input tensor of shape :math:`(B, N, C)` where :math:`B` is batch size,
+            :math:`N` is number of tokens, and :math:`C` is number of channels.
 
         Returns
         -------
         torch.Tensor or tuple[torch.Tensor, torch.Tensor]
-            If ``plus=True``, returns single tensor of shape \((B, H, N, D)\) where
-            \(H\) is number of heads and \(D\) is head dimension. If ``plus=False``,
-            returns tuple of two tensors both of shape \((B, H, N, D)\), representing
+            If ``plus=True``, returns single tensor of shape :math:`(B, H, N, D)` where
+            :math:`H` is number of heads and :math:`D` is head dimension. If ``plus=False``,
+            returns tuple of two tensors both of shape :math:`(B, H, N, D)`, representing
             the query and key projections respectively.
         """
-        # Project input to multi-head representation
+        # Project input to multi-head representation: (B, N, C) -> (B, H, N, D)
         projected_x = rearrange(
             self.in_project_x(x), "B N (h d) -> B h N d", h=self.heads, d=self.dim_head
         )
 
         if self.plus:
-            # Transolver++ uses single projection
+            # Transolver++ uses single projection for both paths
             return projected_x
         else:
             # Standard Transolver uses separate query and key projections
@@ -154,27 +194,32 @@ def project_input_onto_slices(
             return projected_x, feature_projection
 
     def compute_slices_from_projections(
-        self, slice_projections: torch.Tensor, fx: torch.Tensor
-    ) -> tuple[torch.Tensor, torch.Tensor]:
+        self,
+        slice_projections: Float[torch.Tensor, "batch heads tokens slices"],
+        fx: Float[torch.Tensor, "batch heads tokens dim"],
+    ) -> tuple[
+        Float[torch.Tensor, "batch heads tokens slices"],
+        Float[torch.Tensor, "batch heads slices dim"],
+    ]:
         r"""Compute slice weights and slice tokens from input projections and latent features.
 
         Parameters
         ----------
         slice_projections : torch.Tensor
-            Projected input tensor of shape \((B, H, N, S)\) where \(B\) is batch size,
-            \(H\) is number of heads, \(N\) is number of tokens, and \(S\) is number of
+            Projected input tensor of shape :math:`(B, H, N, S)` where :math:`B` is batch size,
+            :math:`H` is number of heads, :math:`N` is number of tokens, and :math:`S` is number of
             slices, representing the projection of each token onto each slice for each
             attention head.
         fx : torch.Tensor
-            Latent feature tensor of shape \((B, H, N, D)\) where \(D\) is head dimension,
+            Latent feature tensor of shape :math:`(B, H, N, D)` where :math:`D` is head dimension,
             representing the learned states to be aggregated by the slice weights.
 
         Returns
         -------
         tuple[torch.Tensor, torch.Tensor]
-            - ``slice_weights``: Tensor of shape \((B, H, N, S)\), normalized weights for
+            - ``slice_weights``: Tensor of shape :math:`(B, H, N, S)`, normalized weights for
               each slice per token and head.
-            - ``slice_token``: Tensor of shape \((B, H, S, D)\), aggregated latent features
+            - ``slice_token``: Tensor of shape :math:`(B, H, S, D)`, aggregated latent features
               for each slice, head, and batch.
 
         Notes
@@ -204,15 +249,19 @@ def compute_slices_from_projections(
         # Ensure weights match the computation dtype
         slice_weights = slice_weights.to(slice_projections.dtype)
 
-        # Aggregate features by slice weights
+        # Aggregate features by slice weights with normalization
         # Normalize first to prevent overflow in reduced precision
-        slice_norm = slice_weights.sum(2)  # Sum over tokens: [B, H, S]
+        slice_norm = slice_weights.sum(2)  # Sum over tokens: (B, H, S)
         normed_weights = slice_weights / (slice_norm[:, :, None, :] + 1e-2)
+
+        # Weighted aggregation: (B, H, S, N) @ (B, H, N, D) -> (B, H, S, D)
         slice_token = torch.matmul(normed_weights.transpose(2, 3), fx)
 
         return slice_weights, slice_token
 
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
+    def forward(
+        self, x: Float[torch.Tensor, "batch tokens channels"]
+    ) -> Float[torch.Tensor, "batch heads slices dim"]:
         r"""Project inputs to physical state slices.
 
         This performs a partial physics attention operation: it projects the input onto
@@ -222,14 +271,14 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         Parameters
         ----------
         x : torch.Tensor
-            Input tensor of shape \((B, N, C)\) where \(B\) is batch size, \(N\) is
-            number of tokens, and \(C\) is number of channels.
+            Input tensor of shape :math:`(B, N, C)` where :math:`B` is batch size, :math:`N` is
+            number of tokens, and :math:`C` is number of channels.
 
         Returns
         -------
         torch.Tensor
-            Slice tokens of shape \((B, H, S, D)\) where \(H\) is number of heads,
-            \(S\) is number of slices, and \(D\) is head dimension.
+            Slice tokens of shape :math:`(B, H, S, D)` where :math:`H` is number of heads,
+            :math:`S` is number of slices, and :math:`D` is head dimension.
 
         Notes
         -----
@@ -237,6 +286,14 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         The slice tokens capture learned physical state representations that are used
         as cross-attention context throughout the model.
         """
+        ### Input validation
+        if not torch.compiler.is_compiling():
+            if x.ndim != 3:
+                raise ValueError(
+                    f"Expected 3D input tensor (B, N, C), "
+                    f"got {x.ndim}D tensor with shape {tuple(x.shape)}"
+                )
+
         # Project inputs onto learned latent spaces
         if self.plus:
             projected_x = self.project_input_onto_slices(x)
@@ -245,7 +302,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         else:
             projected_x, feature_projection = self.project_input_onto_slices(x)
 
-        # Project latent representations onto physical state slices
+        # Project latent representations onto physical state slices: (B, H, N, D) -> (B, H, N, S)
         slice_projections = self.in_project_slice(projected_x)
 
         # Compute weighted aggregation of features into slice tokens
@@ -273,6 +330,37 @@ class GeometricFeatureProcessor(nn.Module):
         Dimension of the input features to query.
     hidden_dim : int
         Output dimension after MLP processing.
+
+    Forward
+    -------
+    query_points : torch.Tensor
+        Query coordinates of shape :math:`(B, N, 3)` where :math:`B` is batch size
+        and :math:`N` is number of query points.
+    key_features : torch.Tensor
+        Features to query from of shape :math:`(B, N, C)` where :math:`C` is
+        ``feature_dim``.
+
+    Outputs
+    -------
+    torch.Tensor
+        Processed features of shape :math:`(B, N, D)` where :math:`D` is ``hidden_dim``.
+
+    See Also
+    --------
+    :class:`MultiScaleFeatureExtractor` : Uses multiple GeometricFeatureProcessor instances.
+    :class:`~physicsnemo.nn.ball_query.BQWarp` : The ball query operation used internally.
+
+    Examples
+    --------
+    >>> import torch
+    >>> processor = GeometricFeatureProcessor(
+    ...     radius=0.1, neighbors_in_radius=16, feature_dim=3, hidden_dim=64
+    ... )
+    >>> query_points = torch.randn(2, 100, 3)  # (batch, points, xyz)
+    >>> key_features = torch.randn(2, 100, 3)  # (batch, points, features)
+    >>> output = processor(query_points, key_features)
+    >>> output.shape
+    torch.Size([2, 100, 64])
     """
 
     def __init__(
@@ -281,10 +369,13 @@ def __init__(
         neighbors_in_radius: int,
         feature_dim: int,
         hidden_dim: int,
-    ):
+    ) -> None:
         super().__init__()
 
+        # Ball query for neighbor search within radius
         self.bq_warp = BQWarp(radius=radius, neighbors_in_radius=neighbors_in_radius)
+
+        # MLP to process flattened neighbor features
         self.mlp = Mlp(
             in_features=feature_dim * neighbors_in_radius,
             hidden_features=[hidden_dim, hidden_dim // 2],
@@ -294,28 +385,47 @@ def __init__(
         )
 
     def forward(
-        self, query_points: torch.Tensor, key_features: torch.Tensor
-    ) -> torch.Tensor:
+        self,
+        query_points: Float[torch.Tensor, "batch points spatial_dim"],
+        key_features: Float[torch.Tensor, "batch points features"],
+    ) -> Float[torch.Tensor, "batch points hidden_dim"]:
         r"""Query neighbors and process features.
 
         Parameters
         ----------
         query_points : torch.Tensor
-            Query coordinates of shape \((B, N, 3)\).
+            Query coordinates of shape :math:`(B, N, 3)` where :math:`B` is batch size
+            and :math:`N` is number of query points.
         key_features : torch.Tensor
-            Features to query from of shape \((B, N, C)\).
+            Features to query from of shape :math:`(B, N, C)` where :math:`C` is the
+            feature dimension.
 
         Returns
         -------
         torch.Tensor
-            Processed features of shape \((B, N, hidden_dim)\).
+            Processed features of shape :math:`(B, N, D)` where :math:`D` is the
+            hidden dimension.
         """
-        # print(f"query_points shape: {query_points.shape}")
-        # print(f"key_features shape: {key_features.shape}")
-        
+        ### Input validation
+        if not torch.compiler.is_compiling():
+            if query_points.ndim != 3:
+                raise ValueError(
+                    f"Expected 3D query_points tensor (B, N, 3), "
+                    f"got {query_points.ndim}D tensor with shape {tuple(query_points.shape)}"
+                )
+            if key_features.ndim != 3:
+                raise ValueError(
+                    f"Expected 3D key_features tensor (B, N, C), "
+                    f"got {key_features.ndim}D tensor with shape {tuple(key_features.shape)}"
+                )
+
+        # Query neighbors within radius: (B, N, K, C)
         _, neighbors = self.bq_warp(query_points, key_features)
-        b, n, k, c = neighbors.shape
+
+        # Flatten neighbor features for MLP: (B, N, K, C) -> (B, N, K*C)
         neighbors_flat = rearrange(neighbors, "b n k c -> b n (k c)")
+
+        # Process through MLP with tanh activation for bounded output
         return torch.nn.functional.tanh(self.mlp(neighbors_flat))
 
 
@@ -332,7 +442,7 @@ class MultiScaleFeatureExtractor(nn.Module):
     radii : list[float]
         Radii for multi-scale processing.
     neighbors_in_radius : list[int]
-        Neighbors per radius.
+        Neighbors per radius (must have same length as ``radii``).
     hidden_dim : int
         Hidden dimension for processing.
     n_head : int
@@ -342,11 +452,44 @@ class MultiScaleFeatureExtractor(nn.Module):
     dropout : float, optional
         Dropout rate. Default is 0.0.
     slice_num : int, optional
-        Number of slices. Default is 64.
+        Number of slices for context tokenization. Default is 64.
     use_te : bool, optional
-        Use Transformer Engine. Default is True.
+        Whether to use Transformer Engine. Default is ``True``.
     plus : bool, optional
-        Use Transolver++. Default is False.
+        Whether to use Transolver++ features. Default is ``False``.
+
+    Forward
+    -------
+    This class does not implement a standard ``forward`` method. Instead, use:
+
+    - :meth:`extract_context_features`: Get tokenized features for GALE context.
+    - :meth:`extract_local_features`: Get concatenated features for local pathway.
+
+    See Also
+    --------
+    :class:`GeometricFeatureProcessor` : Single-scale processor used by this class.
+    :class:`ContextProjector` : Tokenizer used for context features.
+    :class:`GlobalContextBuilder` : High-level builder that uses this class.
+
+    Examples
+    --------
+    >>> import torch
+    >>> extractor = MultiScaleFeatureExtractor(
+    ...     geometry_dim=3,
+    ...     radii=[0.05, 0.25],
+    ...     neighbors_in_radius=[8, 32],
+    ...     hidden_dim=32,
+    ...     n_head=8,
+    ...     dim_head=32,
+    ... )
+    >>> spatial_coords = torch.randn(2, 100, 3)
+    >>> geometry = torch.randn(2, 100, 3)
+    >>> context_feats = extractor.extract_context_features(spatial_coords, geometry)
+    >>> len(context_feats)  # One per scale
+    2
+    >>> local_feats = extractor.extract_local_features(spatial_coords, geometry)
+    >>> local_feats.shape  # Concatenated across scales
+    torch.Size([2, 100, 64])
     """
 
     def __init__(
@@ -361,39 +504,75 @@ def __init__(
         slice_num: int = 64,
         use_te: bool = True,
         plus: bool = False,
-    ):
+    ) -> None:
         super().__init__()
         self.num_scales = len(radii)
 
-        # One processor per scale - simple and reusable
+        # One processor per scale for geometric feature extraction
         self.processors = nn.ModuleList(
             [
-                GeometricFeatureProcessor(radii[i], neighbors_in_radius[i], geometry_dim, hidden_dim)
+                GeometricFeatureProcessor(
+                    radii[i], neighbors_in_radius[i], geometry_dim, hidden_dim
+                )
                 for i in range(self.num_scales)
             ]
         )
 
-        # One tokenizer per scale for context features
+        # One tokenizer per scale for projecting to context space
         self.tokenizers = nn.ModuleList(
             [
-                ContextProjector(hidden_dim, n_head, dim_head, dropout, slice_num, use_te, plus)
+                ContextProjector(
+                    hidden_dim, n_head, dim_head, dropout, slice_num, use_te, plus
+                )
                 for _ in range(self.num_scales)
             ]
         )
 
     def extract_context_features(
-        self, spatial_coords: torch.Tensor, geometry: torch.Tensor
-    ) -> list[torch.Tensor]:
-        r"""Extract and tokenize features for context."""
+        self,
+        spatial_coords: Float[torch.Tensor, "batch points spatial_dim"],
+        geometry: Float[torch.Tensor, "batch points geometry_dim"],
+    ) -> list[Float[torch.Tensor, "batch heads slices dim"]]:
+        r"""Extract and tokenize features for context.
+
+        Parameters
+        ----------
+        spatial_coords : torch.Tensor
+            Spatial coordinates of shape :math:`(B, N, 3)`.
+        geometry : torch.Tensor
+            Geometry features of shape :math:`(B, N, C_{geo})`.
+
+        Returns
+        -------
+        list[torch.Tensor]
+            List of tokenized context features, one per scale, each of shape
+            :math:`(B, H, S, D)`.
+        """
         return [
             tokenizer(processor(spatial_coords, geometry))
             for processor, tokenizer in zip(self.processors, self.tokenizers)
         ]
 
     def extract_local_features(
-        self, spatial_coords: torch.Tensor, geometry: torch.Tensor
-    ) -> torch.Tensor:
-        r"""Extract and concatenate features for local pathway."""
+        self,
+        spatial_coords: Float[torch.Tensor, "batch points spatial_dim"],
+        geometry: Float[torch.Tensor, "batch points geometry_dim"],
+    ) -> Float[torch.Tensor, "batch points total_hidden"]:
+        r"""Extract and concatenate features for local pathway.
+
+        Parameters
+        ----------
+        spatial_coords : torch.Tensor
+            Spatial coordinates of shape :math:`(B, N, 3)`.
+        geometry : torch.Tensor
+            Geometry features of shape :math:`(B, N, C_{geo})`.
+
+        Returns
+        -------
+        torch.Tensor
+            Concatenated local features of shape :math:`(B, N, D_{total})` where
+            :math:`D_{total}` is ``hidden_dim * num_scales``.
+        """
         return torch.cat(
             [processor(geometry, spatial_coords) for processor in self.processors],
             dim=-1,
@@ -404,20 +583,23 @@ class GlobalContextBuilder(nn.Module):
     r"""Orchestrates all context construction with a clean, simple interface.
 
     Manages geometry tokenization, global embedding tokenization, and optional
-    multi-scale local features.
+    multi-scale local features. This is the main entry point for building context
+    in the GeoTransolver model.
 
     Parameters
     ----------
     functional_dims : tuple[int, ...]
         Dimensions of each functional input type.
     geometry_dim : int | None, optional
-        Geometry feature dimension. Default is None.
+        Geometry feature dimension. If ``None``, geometry context is disabled.
+        Default is ``None``.
     global_dim : int | None, optional
-        Global embedding dimension. Default is None.
+        Global embedding dimension. If ``None``, global context is disabled.
+        Default is ``None``.
     radii : list[float], optional
-        Radii for local features. Default is [0.05, 0.25].
+        Radii for local features. Default is ``[0.05, 0.25]``.
     neighbors_in_radius : list[int], optional
-        Neighbors per radius. Default is [8, 32].
+        Neighbors per radius. Default is ``[8, 32]``.
     n_hidden_local : int, optional
         Hidden dim for local features. Default is 32.
     n_hidden : int, optional
@@ -427,13 +609,43 @@ class GlobalContextBuilder(nn.Module):
     dropout : float, optional
         Dropout rate. Default is 0.0.
     slice_num : int, optional
-        Number of slices. Default is 32.
+        Number of slices for tokenization. Default is 32.
     use_te : bool, optional
-        Use Transformer Engine. Default is True.
+        Whether to use Transformer Engine. Default is ``True``.
     plus : bool, optional
-        Use Transolver++. Default is False.
+        Whether to use Transolver++ features. Default is ``False``.
     include_local_features : bool, optional
-        Enable local feature extraction. Default is False.
+        Enable local feature extraction. Default is ``False``.
+
+    Forward
+    -------
+    This class does not implement a standard ``forward`` method. Instead, use
+    :meth:`build_context` to construct context and local features.
+
+    See Also
+    --------
+    :class:`ContextProjector` : Used for tokenizing geometry and global embeddings.
+    :class:`MultiScaleFeatureExtractor` : Used for multi-scale local features.
+    :class:`~physicsnemo.experimental.models.geotransolver.GeoTransolver` : Main model that uses this builder.
+
+    Examples
+    --------
+    >>> import torch
+    >>> builder = GlobalContextBuilder(
+    ...     functional_dims=(64,),
+    ...     geometry_dim=3,
+    ...     global_dim=16,
+    ...     n_hidden=256,
+    ...     n_head=8,
+    ... )
+    >>> local_embeddings = (torch.randn(2, 100, 64),)
+    >>> geometry = torch.randn(2, 100, 3)
+    >>> global_embedding = torch.randn(2, 1, 16)
+    >>> context, local_feats = builder.build_context(
+    ...     local_embeddings, None, geometry, global_embedding
+    ... )
+    >>> context.shape
+    torch.Size([2, 8, 32, 64])
     """
 
     def __init__(
@@ -441,8 +653,8 @@ def __init__(
         functional_dims: tuple[int, ...],
         geometry_dim: int | None = None,
         global_dim: int | None = None,
-        radii: list[float] = [0.05, 0.25],
-        neighbors_in_radius: list[int] = [8, 32],
+        radii: list[float] | None = None,
+        neighbors_in_radius: list[int] | None = None,
         n_hidden_local: int = 32,
         n_hidden: int = 256,
         n_head: int = 8,
@@ -451,9 +663,15 @@ def __init__(
         use_te: bool = True,
         plus: bool = False,
         include_local_features: bool = False,
-    ):
+    ) -> None:
         super().__init__()
 
+        # Set defaults for mutable arguments
+        if radii is None:
+            radii = [0.05, 0.25]
+        if neighbors_in_radius is None:
+            neighbors_in_radius = [8, 32]
+
         dim_head = n_hidden // n_head
         context_dim = 0
 
@@ -462,8 +680,16 @@ def __init__(
             self.local_extractors = nn.ModuleList(
                 [
                     MultiScaleFeatureExtractor(
-                        geometry_dim, radii, neighbors_in_radius, n_hidden_local,
-                        n_head, dim_head, dropout, slice_num, use_te, plus
+                        geometry_dim,
+                        radii,
+                        neighbors_in_radius,
+                        n_hidden_local,
+                        n_head,
+                        dim_head,
+                        dropout,
+                        slice_num,
+                        use_te,
+                        plus,
                     )
                     for _ in functional_dims
                 ]
@@ -472,7 +698,7 @@ def __init__(
         else:
             self.local_extractors = None
 
-        # Geometry tokenizer
+        # Geometry tokenizer for global geometry context
         if geometry_dim is not None:
             self.geometry_tokenizer = ContextProjector(
                 geometry_dim, n_head, dim_head, dropout, slice_num, use_te, plus
@@ -481,7 +707,7 @@ def __init__(
         else:
             self.geometry_tokenizer = None
 
-        # Global tokenizer
+        # Global embedding tokenizer
         if global_dim is not None:
             self.global_tokenizer = ContextProjector(
                 global_dim, n_head, dim_head, dropout, slice_num, use_te, plus
@@ -493,59 +719,97 @@ def __init__(
         self._context_dim = context_dim
 
     def get_context_dim(self) -> int:
-        r"""Return total context dimension."""
+        r"""Return total context dimension.
+
+        Returns
+        -------
+        int
+            Total dimension of the concatenated context features.
+        """
         return self._context_dim
 
     def build_context(
         self,
-        local_embeddings: tuple[torch.Tensor, ...],
-        local_positions: tuple[torch.Tensor, ...],
-        geometry: torch.Tensor | None = None,
-        global_embedding: torch.Tensor | None = None,
-    ) -> tuple[torch.Tensor | None, list[torch.Tensor] | None]:
+        local_embeddings: tuple[Float[torch.Tensor, "batch tokens features"], ...],
+        local_positions: (
+            tuple[Float[torch.Tensor, "batch tokens spatial_dim"], ...] | None
+        ),
+        geometry: Float[torch.Tensor, "batch tokens geometry_dim"] | None = None,
+        global_embedding: Float[torch.Tensor, "batch global_tokens global_dim"]
+        | None = None,
+    ) -> tuple[
+        Float[torch.Tensor, "batch heads slices context_dim"] | None,
+        list[Float[torch.Tensor, "batch tokens local_features"]] | None,
+    ]:
         r"""Build all context and local features.
 
         Parameters
         ----------
         local_embeddings : tuple[torch.Tensor, ...]
-            Input embeddings, each of shape \((B, N, C_i)\).
-        local_positions : tuple[torch.Tensor, ...] | None, optional
-            Local positions, each of shape \((B, N, 3)\). These are used to query the neighbors.
+            Input embeddings, each of shape :math:`(B, N, C_i)` where :math:`B` is
+            batch size, :math:`N` is number of tokens, and :math:`C_i` is the feature
+            dimension for input type :math:`i`.
+        local_positions : tuple[torch.Tensor, ...] | None
+            Local positions, each of shape :math:`(B, N, 3)`. These are used to query
+            neighbors for local features. Required if ``include_local_features=True``.
         geometry : torch.Tensor | None, optional
-            Geometry of shape \((B, N, C_{geo})\). Default is None.
+            Geometry features of shape :math:`(B, N, C_{geo})`. Default is ``None``.
         global_embedding : torch.Tensor | None, optional
-            Global embedding of shape \((B, N_g, C_g)\). Default is None.
+            Global embedding of shape :math:`(B, N_g, C_g)`. Default is ``None``.
 
         Returns
         -------
         tuple[torch.Tensor | None, list[torch.Tensor] | None]
-            Context tensor and local features list.
+            - ``context``: Concatenated context tensor of shape :math:`(B, H, S, D_c)`
+              where :math:`D_c` is the total context dimension, or ``None`` if no
+              context sources are provided.
+            - ``local_features``: List of local feature tensors, one per input type,
+              each of shape :math:`(B, N, D_l)`, or ``None`` if local features are
+              disabled.
+
+        Raises
+        ------
+        ValueError
+            If ``local_positions`` is ``None`` but local features are enabled.
         """
+        ### Input validation
+        if not torch.compiler.is_compiling():
+            if len(local_embeddings) == 0:
+                raise ValueError("Expected non-empty tuple of local embeddings")
+            for i, emb in enumerate(local_embeddings):
+                if emb.ndim != 3:
+                    raise ValueError(
+                        f"Expected 3D local_embedding tensor (B, N, C) at index {i}, "
+                        f"got {emb.ndim}D tensor with shape {tuple(emb.shape)}"
+                    )
+
         context_parts = []
         local_features = None
 
         if local_positions is None and self.local_extractors is not None:
-            raise ValueError("Local positions are required if local features are enabled.")
+            raise ValueError(
+                "Local positions are required if local features are enabled."
+            )
 
         # Extract multi-scale features if enabled
         if self.local_extractors is not None and geometry is not None:
             local_features = []
             for i, embedding in enumerate(local_embeddings):
                 spatial_coords = local_positions[i]  # Extract coordinates
-                
-                # Get tokenized context features
+
+                # Get tokenized context features from multi-scale extractor
                 context_feats = self.local_extractors[i].extract_context_features(
                     spatial_coords, geometry
                 )
                 context_parts.extend(context_feats)
-                
-                # Get concatenated local features
+
+                # Get concatenated local features for skip connection
                 local_feats = self.local_extractors[i].extract_local_features(
                     spatial_coords, geometry
                 )
                 local_features.append(local_feats)
 
-        # Tokenize geometry
+        # Tokenize geometry features
         if self.geometry_tokenizer is not None and geometry is not None:
             context_parts.append(self.geometry_tokenizer(geometry))
 
@@ -553,8 +817,7 @@ def build_context(
         if self.global_tokenizer is not None and global_embedding is not None:
             context_parts.append(self.global_tokenizer(global_embedding))
 
-        # Concatenate all context
+        # Concatenate all context features along the last dimension
         context = torch.cat(context_parts, dim=-1) if context_parts else None
 
-        return context, local_features
-
+        return context, local_features
\ No newline at end of file
diff --git a/physicsnemo/experimental/models/geotransolver/gale.py b/physicsnemo/experimental/models/geotransolver/gale.py
index 82c78c9991..75461f6e64 100644
--- a/physicsnemo/experimental/models/geotransolver/gale.py
+++ b/physicsnemo/experimental/models/geotransolver/gale.py
@@ -14,30 +14,33 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from dataclasses import dataclass
-from collections.abc import Sequence
+"""GALE (Geometry-Aware Latent Embeddings) attention layer and transformer block.
+
+This module provides the GALE attention mechanism and GALE_block transformer block,
+which extend the Transolver physics attention with cross-attention capabilities for
+geometry and global context embeddings.
+"""
+
+from __future__ import annotations
 
 import torch
 import torch.nn as nn
 from einops import rearrange
-import torch.nn.functional as F
+from jaxtyping import Float
 
 import physicsnemo  # noqa: F401 for docs
 from physicsnemo.core.version_check import check_version_spec
 from physicsnemo.models.transolver.Physics_Attention import (
     PhysicsAttentionIrregularMesh,
-    gumbel_softmax,
 )
 from physicsnemo.models.transolver.transolver import MLP
 
-from physicsnemo.core.meta import ModelMetaData
-from physicsnemo.core.module import Module
-
 # Check optional dependency availability
 TE_AVAILABLE = check_version_spec("transformer_engine", "0.1.0", hard_fail=False)
 if TE_AVAILABLE:
     import transformer_engine.pytorch as te
 
+
 class GALE(PhysicsAttentionIrregularMesh):
     r"""Geometry-Aware Latent Embeddings (GALE) attention layer.
 
@@ -65,21 +68,49 @@ class GALE(PhysicsAttentionIrregularMesh):
     context_dim : int, optional
         Dimension of the context vector for cross-attention. Default is 0.
 
+    Forward
+    -------
+    x : tuple[torch.Tensor, ...]
+        Tuple of input tensors, each of shape :math:`(B, N, C)` where :math:`B` is
+        batch size, :math:`N` is number of tokens, and :math:`C` is number of channels.
+    context : tuple[torch.Tensor, ...] | None, optional
+        Context tensor for cross-attention of shape :math:`(B, H, S_c, D_c)` where
+        :math:`H` is number of heads, :math:`S_c` is number of context slices, and
+        :math:`D_c` is context dimension. If ``None``, only self-attention is applied.
+        Default is ``None``.
+
+    Outputs
+    -------
+    list[torch.Tensor]
+        List of output tensors, each of shape :math:`(B, N, C)`, same shape as inputs.
+
     Notes
     -----
     The mixing between self-attention and cross-attention is controlled by a learnable
     parameter ``state_mixing`` which is passed through a sigmoid function to ensure
-    the mixing weight stays in \([0, 1]\).
+    the mixing weight stays in :math:`[0, 1]`.
 
     See Also
     --------
     :class:`physicsnemo.models.transolver.Physics_Attention.PhysicsAttentionIrregularMesh` : Base physics attention class.
     :class:`GALE_block` : Transformer block using GALE attention.
+
+    Examples
+    --------
+    >>> import torch
+    >>> gale = GALE(dim=256, heads=8, dim_head=32, context_dim=32)
+    >>> x = (torch.randn(2, 100, 256),)  # Single input tensor in tuple
+    >>> context = torch.randn(2, 8, 64, 32)  # Context for cross-attention
+    >>> outputs = gale(x, context)
+    >>> len(outputs)
+    1
+    >>> outputs[0].shape
+    torch.Size([2, 100, 256])
     """
 
     def __init__(
         self,
-        dim,
+        dim: int,
         heads: int = 8,
         dim_head: int = 64,
         dropout: float = 0.0,
@@ -87,49 +118,55 @@ def __init__(
         use_te: bool = True,
         plus: bool = False,
         context_dim: int = 0,
-    ):
+    ) -> None:
         super().__init__(dim, heads, dim_head, dropout, slice_num, use_te, plus)
 
         linear_layer = te.Linear if self.use_te else nn.Linear
 
-        # We have additional parameters, here:
+        # Cross-attention projection layers for context integration
         self.cross_q = linear_layer(dim_head, dim_head)
         self.cross_k = linear_layer(context_dim, dim_head)
         self.cross_v = linear_layer(context_dim, dim_head)
 
-        # This is the learnable mixing weight between self and cross attention.
-        # We start near 0.0 since it is passed through a sigmoid to keep the
-        # mixing weight between 0 and 1.
+        # Learnable mixing weight between self and cross attention
+        # Initialize near 0.0 since sigmoid(0) = 0.5, giving balanced initial mixing
         self.state_mixing = nn.Parameter(torch.tensor(0.0))
 
     def compute_slice_attention_cross(
-        self, slice_tokens: torch.Tensor, context: torch.Tensor
-    ) -> torch.Tensor:
+        self,
+        slice_tokens: list[Float[torch.Tensor, "batch heads slices dim"]],
+        context: Float[torch.Tensor, "batch heads context_slices context_dim"],
+    ) -> list[Float[torch.Tensor, "batch heads slices dim"]]:
         r"""Compute cross-attention between slice tokens and context.
 
         Parameters
         ----------
-        slice_tokens : torch.Tensor
-            Slice tokens of shape \((B, H, N, D)\) where \(B\) is batch size, \(H\) is number of heads, \(N\) is number of slices, and \(D\) is head dimension.
+        slice_tokens : list[torch.Tensor]
+            List of slice token tensors, each of shape :math:`(B, H, N, D)` where
+            :math:`B` is batch size, :math:`H` is number of heads, :math:`N` is
+            number of slices, and :math:`D` is head dimension.
         context : torch.Tensor
-            Context tensor of shape \((B, H, N_c, D_c)\) where \(N_c\) is number of context slices and \(D_c\) is context dimension.
+            Context tensor of shape :math:`(B, H, N_c, D_c)` where :math:`N_c` is
+            number of context slices and :math:`D_c` is context dimension.
 
         Returns
         -------
-        torch.Tensor
-            Cross-attention output of shape \((B, H, N, D)\).
+        list[torch.Tensor]
+            List of cross-attention outputs, each of shape :math:`(B, H, N, D)`.
         """
+        # Concatenate all slice tokens for batched projection
+        q_input = torch.cat(slice_tokens, dim=-2)  # (B, H, total_slices, D)
 
-        # Project the slice and context tokens:
+        # Project queries from slice tokens
+        q = self.cross_q(q_input)  # (B, H, total_slices, D)
 
-        q_input = torch.cat(slice_tokens, dim=-2)
-        q = self.cross_q(q_input)
-        
-        k = self.cross_k(context)
-        v = self.cross_v(context)
+        # Project keys and values from context
+        k = self.cross_k(context)  # (B, H, N_c, D)
+        v = self.cross_v(context)  # (B, H, N_c, D)
 
-        # Compute the attention:
+        # Compute cross-attention using appropriate backend
         if self.use_te:
+            # Transformer Engine expects (B, S, H, D) format
             q = rearrange(q, "b h s d -> b s h d")
             k = rearrange(k, "b h s d -> b s h d")
             v = rearrange(v, "b h s d -> b s h d")
@@ -138,82 +175,114 @@ def compute_slice_attention_cross(
                 cross_attention, "b s (h d) -> b h s d", h=self.heads, d=self.dim_head
             )
         else:
+            # Use PyTorch's scaled dot-product attention
             cross_attention = torch.nn.functional.scaled_dot_product_attention(
                 q, k, v, is_causal=False
             )
-        cross_attention = torch.split(cross_attention, slice_tokens[0].shape[-2], dim=-2)
 
+        # Split back into individual slice token outputs
+        cross_attention = torch.split(
+            cross_attention, slice_tokens[0].shape[-2], dim=-2
+        )
 
-        return cross_attention
+        return list(cross_attention)
 
     def forward(
-        self, x: tuple[torch.Tensor, ...], context: tuple[torch.Tensor, ...] | None = None
-    ) -> torch.Tensor:
+        self,
+        x: tuple[Float[torch.Tensor, "batch tokens channels"], ...],
+        context: Float[torch.Tensor, "batch heads context_slices context_dim"]
+        | None = None,
+    ) -> list[Float[torch.Tensor, "batch tokens channels"]]:
         r"""Forward pass of the GALE module.
 
+        Applies physics-aware self-attention combined with optional cross-attention
+        to geometry and global context.
+
         Parameters
         ----------
-        x : torch.Tensor
-            Input tensor of shape \((B, N, C)\) where \(B\) is batch size, \(N\) is number of tokens, and \(C\) is number of channels.
-        context : torch.Tensor, optional
-            Context tensor for cross-attention of shape \((B, H, S_c, D_c)\) where \(H\) is number of heads, \(S_c\) is number of context slices, and \(D_c\) is context dimension. If None, only self-attention is applied. Default is None.
+        x : tuple[torch.Tensor, ...]
+            Tuple of input tensors, each of shape :math:`(B, N, C)` where :math:`B`
+            is batch size, :math:`N` is number of tokens, and :math:`C` is number
+            of channels.
+        context : torch.Tensor | None, optional
+            Context tensor for cross-attention of shape :math:`(B, H, S_c, D_c)`
+            where :math:`H` is number of heads, :math:`S_c` is number of context
+            slices, and :math:`D_c` is context dimension. If ``None``, only
+            self-attention is applied. Default is ``None``.
 
         Returns
         -------
-        torch.Tensor
-            Output tensor of shape \((B, N, C)\), same shape as input.
+        list[torch.Tensor]
+            List of output tensors, each of shape :math:`(B, N, C)``, same shape
+            as inputs.
         """
-        # Project the inputs onto learned spaces:
+        ### Input validation
+        if not torch.compiler.is_compiling():
+            if len(x) == 0:
+                raise ValueError("Expected non-empty tuple of input tensors")
+            for i, tensor in enumerate(x):
+                if tensor.ndim != 3:
+                    raise ValueError(
+                        f"Expected 3D input tensor (B, N, C) at index {i}, "
+                        f"got {tensor.ndim}D tensor with shape {tuple(tensor.shape)}"
+                    )
+
+        # Project inputs onto learned latent spaces
         if self.plus:
-            x_mid = [ self.project_input_onto_slices(_x) for _x in x ]
-            # In transolver ++, fx_mid is gone.
-            # x_mid is used to compute the projections instead:
-            fx_mid = [ _x_mid for _x_mid in x_mid ]
+            x_mid = [self.project_input_onto_slices(_x) for _x in x]
+            # In Transolver++, x_mid is reused for both projections
+            fx_mid = [_x_mid for _x_mid in x_mid]
         else:
-            x_mid, fx_mid = zip(*[ self.project_input_onto_slices(_x) for _x in x ])
+            x_mid, fx_mid = zip(
+                *[self.project_input_onto_slices(_x) for _x in x]
+            )
 
-        # Perform the linear projection of learned latent space onto slices:
-        slice_projections = [ self.in_project_slice(_x_mid) for _x_mid in x_mid ]
+        # Project latent representations onto physical state slices
+        slice_projections = [self.in_project_slice(_x_mid) for _x_mid in x_mid]
 
-        # Slice projections has shape [B, N_head, N_tokens, Head_dim], but head_dim may have changed!
-        # Use the slice projections and learned spaces to compute the slices, and their weights:
-        slice_weights, slice_tokens = zip(*[self.compute_slices_from_projections(proj, _fx_mid) for proj, _fx_mid in zip(slice_projections, fx_mid)])
-        # slice_weights has shape [Batch, N_heads, N_tokens, Slice_num]
-        # slice_tokens has shape  [Batch, N_heads, N_tokens, head_dim]
-        # Apply attention to the slice tokens
+        # Compute slice weights and aggregated slice tokens
+        slice_weights, slice_tokens = zip(
+            *[
+                self.compute_slices_from_projections(proj, _fx_mid)
+                for proj, _fx_mid in zip(slice_projections, fx_mid)
+            ]
+        )
+
+        # Apply self-attention to slice tokens
         if self.use_te:
-            self_slice_token = [ self.compute_slice_attention_te(_slice_token) for _slice_token in slice_tokens ]
+            self_slice_token = [
+                self.compute_slice_attention_te(_slice_token)
+                for _slice_token in slice_tokens
+            ]
         else:
-            self_slice_token = [ self.compute_slice_attention_sdpa(_slice_token) for _slice_token in slice_tokens ]
-        
-        # HERE, we are differing: apply cross-attention with physical states:
+            self_slice_token = [
+                self.compute_slice_attention_sdpa(_slice_token)
+                for _slice_token in slice_tokens
+            ]
+
+        # Apply cross-attention with context if provided
         if context is not None:
-            # cross_slice_token = self.compute_slice_attention_cross(
-            #     slice_tokens, context
-            # )
-            cross_slice_token = [ self.compute_slice_attention_cross([_slice_token], context)[0] 
-                for _slice_token in slice_tokens 
+            cross_slice_token = [
+                self.compute_slice_attention_cross([_slice_token], context)[0]
+                for _slice_token in slice_tokens
             ]
-            
-            # Apply learnable mixing:
+
+            # Blend self-attention and cross-attention with learnable mixing weight
             mixing_weight = torch.sigmoid(self.state_mixing)
-            out_slice_token = [ mixing_weight * sst + (1 - mixing_weight) * cst
+            out_slice_token = [
+                mixing_weight * sst + (1 - mixing_weight) * cst
                 for sst, cst in zip(self_slice_token, cross_slice_token)
             ]
-
         else:
-            # Just keep self attention:
+            # Use only self-attention when no context is provided
             out_slice_token = self_slice_token
 
-        # Shape unchanged
-
-        # Deslice:
+        # Project attention outputs back to original space using slice weights
         outputs = [
-            self.project_attention_outputs(ost, sw) for ost, sw in zip(out_slice_token, slice_weights)
+            self.project_attention_outputs(ost, sw)
+            for ost, sw in zip(out_slice_token, slice_weights)
         ]
 
-        # Outputs now has the same shape as the original input x
-
         return outputs
 
 
@@ -233,26 +302,58 @@ class GALE_block(nn.Module):
     dropout : float
         Dropout rate.
     act : str, optional
-        Activation function name. Default is "gelu".
+        Activation function name. Default is ``"gelu"``.
     mlp_ratio : int, optional
         Ratio of MLP hidden dimension to ``hidden_dim``. Default is 4.
     last_layer : bool, optional
-        Whether this is the last layer in the model. Default is False.
+        Whether this is the last layer in the model. Default is ``False``.
     out_dim : int, optional
         Output dimension (only used if ``last_layer=True``). Default is 1.
     slice_num : int, optional
         Number of learned physical state slices. Default is 32.
     use_te : bool, optional
-        Whether to use Transformer Engine backend. Default is True.
+        Whether to use Transformer Engine backend. Default is ``True``.
     plus : bool, optional
-        Whether to use Transolver++ features. Default is False.
+        Whether to use Transolver++ features. Default is ``False``.
     context_dim : int, optional
         Dimension of the context vector for cross-attention. Default is 0.
 
+    Forward
+    -------
+    fx : tuple[torch.Tensor, ...]
+        Tuple of input tensors, each of shape :math:`(B, N, C)` where :math:`B` is
+        batch size, :math:`N` is number of tokens, and :math:`C` is hidden dimension.
+    global_context : tuple[torch.Tensor, ...]
+        Global context tensor for cross-attention of shape :math:`(B, H, S_c, D_c)`
+        where :math:`H` is number of heads, :math:`S_c` is number of context slices,
+        and :math:`D_c` is context dimension.
+
+    Outputs
+    -------
+    list[torch.Tensor]
+        List of output tensors, each of shape :math:`(B, N, C)`, same shape as inputs.
+
     Notes
     -----
     The block applies layer normalization before the attention operation and uses
     residual connections after both the attention and MLP layers.
+
+    See Also
+    --------
+    :class:`GALE` : The attention mechanism used in this block.
+    :class:`physicsnemo.experimental.models.geotransolver.GeoTransolver` : Main model using GALE_block.
+
+    Examples
+    --------
+    >>> import torch
+    >>> block = GALE_block(num_heads=8, hidden_dim=256, dropout=0.1, context_dim=32)
+    >>> fx = (torch.randn(2, 100, 256),)  # Single input tensor in tuple
+    >>> context = torch.randn(2, 8, 64, 32)  # Global context
+    >>> outputs = block(fx, context)
+    >>> len(outputs)
+    1
+    >>> outputs[0].shape
+    torch.Size([2, 100, 256])
     """
 
     def __init__(
@@ -260,28 +361,32 @@ def __init__(
         num_heads: int,
         hidden_dim: int,
         dropout: float,
-        act="gelu",
-        mlp_ratio=4,
-        last_layer=False,
-        out_dim=1,
-        slice_num=32,
-        use_te=True,
+        act: str = "gelu",
+        mlp_ratio: int = 4,
+        last_layer: bool = False,
+        out_dim: int = 1,
+        slice_num: int = 32,
+        use_te: bool = True,
         plus: bool = False,
         context_dim: int = 0,
-    ):
+    ) -> None:
         super().__init__()
 
         if use_te and not TE_AVAILABLE:
             raise ImportError(
-                "Transformer Engine is not installed. Please install it with: pip install transformer-engine>=0.1.0"
+                "Transformer Engine is not installed. "
+                "Please install it with: pip install transformer-engine>=0.1.0"
             )
 
         self.last_layer = last_layer
+
+        # Layer normalization before attention
         if use_te:
             self.ln_1 = te.LayerNorm(hidden_dim)
         else:
             self.ln_1 = nn.LayerNorm(hidden_dim)
 
+        # GALE attention layer
         self.Attn = GALE(
             hidden_dim,
             heads=num_heads,
@@ -293,6 +398,7 @@ def __init__(
             context_dim=context_dim,
         )
 
+        # Feed-forward network with layer normalization
         if use_te:
             self.ln_mlp1 = te.LayerNormMLP(
                 hidden_size=hidden_dim,
@@ -312,28 +418,50 @@ def __init__(
                 ),
             )
 
-    def forward(self, fx: tuple[torch.Tensor, ...], global_context: tuple[torch.Tensor, ...]) -> torch.Tensor:
+    def forward(
+        self,
+        fx: tuple[Float[torch.Tensor, "batch tokens hidden_dim"], ...],
+        global_context: Float[torch.Tensor, "batch heads context_slices context_dim"],
+    ) -> list[Float[torch.Tensor, "batch tokens hidden_dim"]]:
         r"""Forward pass of the GALE block.
 
         Parameters
         ----------
-        fx : torch.Tensor
-            Input tensor of shape \((B, N, C)\) where \(B\) is batch size, \(N\) is number of tokens, and \(C\) is hidden dimension.
+        fx : tuple[torch.Tensor, ...]
+            Tuple of input tensors, each of shape :math:`(B, N, C)` where :math:`B`
+            is batch size, :math:`N` is number of tokens, and :math:`C` is hidden
+            dimension.
         global_context : torch.Tensor
-            Global context tensor for cross-attention of shape \((B, H, S_c, D_c)\) where \(H\) is number of heads, \(S_c\) is number of context slices, and \(D_c\) is context dimension.
+            Global context tensor for cross-attention of shape :math:`(B, H, S_c, D_c)`
+            where :math:`H` is number of heads, :math:`S_c` is number of context slices,
+            and :math:`D_c` is context dimension.
 
         Returns
         -------
-        torch.Tensor
-            Output tensor of shape \((B, N, C)\), same shape as input.
+        list[torch.Tensor]
+            List of output tensors, each of shape :math:`(B, N, C)`, same shape as inputs.
         """
-        
-        normed_inputs = [ self.ln_1(_fx) for _fx in fx ]
-        attn = self.Attn(normed_inputs, global_context)
-        
-        fx = [ attn[i] + normed_inputs[i] for i in range(len(normed_inputs)) ]
-        
-        fx = [ self.ln_mlp1(_fx) + _fx for _fx in fx ]
-
-        return fx
-
+        ### Input validation
+        if not torch.compiler.is_compiling():
+            if len(fx) == 0:
+                raise ValueError("Expected non-empty tuple of input tensors")
+            for i, tensor in enumerate(fx):
+                if tensor.ndim != 3:
+                    raise ValueError(
+                        f"Expected 3D input tensor (B, N, C) at index {i}, "
+                        f"got {tensor.ndim}D tensor with shape {tuple(tensor.shape)}"
+                    )
+
+        # Apply pre-normalization to all inputs
+        normed_inputs = [self.ln_1(_fx) for _fx in fx]
+
+        # Apply GALE attention with cross-attention to global context
+        attn = self.Attn(tuple(normed_inputs), global_context)
+
+        # Residual connection after attention
+        fx_out = [attn[i] + normed_inputs[i] for i in range(len(normed_inputs))]
+
+        # Feed-forward network with residual connection
+        fx_out = [self.ln_mlp1(_fx) + _fx for _fx in fx_out]
+
+        return fx_out
\ No newline at end of file
diff --git a/physicsnemo/experimental/models/geotransolver/geotransolver.py b/physicsnemo/experimental/models/geotransolver/geotransolver.py
index 8330106984..fe58f8804e 100644
--- a/physicsnemo/experimental/models/geotransolver/geotransolver.py
+++ b/physicsnemo/experimental/models/geotransolver/geotransolver.py
@@ -14,20 +14,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from dataclasses import dataclass
+r"""GeoTransolver: Geometry-Aware Physics Attention Transformer.
+
+This module provides the GeoTransolver model, which extends the Transolver architecture
+with GALE (Geometry-Aware Latent Embeddings) attention for incorporating geometric
+structure and global context throughout the forward pass.
+"""
+
+from __future__ import annotations
+
 from collections.abc import Sequence
+from dataclasses import dataclass
 
 import torch
 import torch.nn as nn
+from jaxtyping import Float
 
 import physicsnemo  # noqa: F401 for docs
-from physicsnemo.core.version_check import check_version_spec
-from physicsnemo.models.transolver.transolver import MLP
-
 from physicsnemo.core.meta import ModelMetaData
 from physicsnemo.core.module import Module
+from physicsnemo.core.version_check import check_version_spec
+from physicsnemo.models.transolver.transolver import MLP
 
-from .context_projector import ContextProjector, GlobalContextBuilder
+from .context_projector import GlobalContextBuilder
 from .gale import GALE_block
 
 # Check optional dependency availability
@@ -36,11 +45,32 @@
     import transformer_engine.pytorch as te
 
 
-
 @dataclass
 class GeoTransolverMetaData(ModelMetaData):
-    """
-    Data class for storing essential meta data needed for the GeoTransolver model.
+    r"""Data class for storing essential meta data needed for the GeoTransolver model.
+
+    Attributes
+    ----------
+    name : str
+        Model name. Default is ``"GeoTransolver"``.
+    jit : bool
+        Whether JIT compilation is supported. Default is ``False``.
+    cuda_graphs : bool
+        Whether CUDA graphs are supported. Default is ``False``.
+    amp : bool
+        Whether automatic mixed precision is supported. Default is ``True``.
+    onnx_cpu : bool
+        Whether ONNX export to CPU is supported. Default is ``False``.
+    onnx_gpu : bool
+        Whether ONNX export to GPU is supported. Default is ``True``.
+    onnx_runtime : bool
+        Whether ONNX runtime is supported. Default is ``True``.
+    var_dim : int
+        Variable dimension for physics-informed features. Default is 1.
+    func_torch : bool
+        Whether torch functions are used. Default is ``False``.
+    auto_grad : bool
+        Whether automatic differentiation is used. Default is ``False``.
     """
 
     name: str = "GeoTransolver"
@@ -58,7 +88,24 @@ class GeoTransolverMetaData(ModelMetaData):
     auto_grad: bool = False
 
 
-def _normalize_dim(x):
+def _normalize_dim(x: int | Sequence[int]) -> tuple[int, ...]:
+    r"""Normalize dimension specification to tuple format.
+
+    Parameters
+    ----------
+    x : int | Sequence[int]
+        Dimension specification as scalar or sequence.
+
+    Returns
+    -------
+    tuple[int, ...]
+        Normalized dimension tuple.
+
+    Raises
+    ------
+    TypeError
+        If ``x`` is not an int or valid sequence.
+    """
     # Accept int as scalar
     if isinstance(x, int):
         return (x,)
@@ -68,36 +115,64 @@ def _normalize_dim(x):
     raise TypeError(f"Invalid dim specifier {x!r}")
 
 
-def _normalize_tensor(x):
-    # Accept int as scalar
+def _normalize_tensor(
+    x: torch.Tensor | Sequence[torch.Tensor],
+) -> tuple[torch.Tensor, ...]:
+    r"""Normalize tensor input to tuple format.
+
+    Parameters
+    ----------
+    x : torch.Tensor | Sequence[torch.Tensor]
+        Single tensor or sequence of tensors.
+
+    Returns
+    -------
+    tuple[torch.Tensor, ...]
+        Normalized tensor tuple.
+
+    Raises
+    ------
+    TypeError
+        If ``x`` is not a tensor or valid sequence.
+    """
+    # Accept single tensor
     if isinstance(x, torch.Tensor):
         return (x,)
     if isinstance(x, Sequence):
-        return x
+        return tuple(x)
     raise TypeError(f"Invalid tensor structure")
 
+
 class GeoTransolver(Module):
     r"""GeoTransolver: Geometry-Aware Physics Attention Transformer.
 
-    GeoTransolver is an adaptation of the Transolver architecture, replacing standard attention
-    with GALE (Geometry-Aware Latent Embeddings) attention. GALE combines physics-aware
-    self-attention on learned state slices with cross-attention to geometry and global
-    context embeddings.
+    GeoTransolver is an adaptation of the Transolver architecture, replacing standard
+    attention with GALE (Geometry-Aware Latent Embeddings) attention. GALE combines
+    physics-aware self-attention on learned state slices with cross-attention to
+    geometry and global context embeddings.
 
-    The model projects geometry and global features onto physical state spaces, which are
-    then used as context in all transformer blocks. This design enables the model to
-    incorporate geometric structure and global information throughout the forward pass.
+    The model projects geometry and global features onto physical state spaces, which
+    are then used as context in all transformer blocks. This design enables the model
+    to incorporate geometric structure and global information throughout the forward
+    pass.
 
     Parameters
     ----------
-    functional_dim : int
-        Dimension of the input values (local embeddings), not including global embeddings or geometry features. Input will be projected to ``n_hidden`` before processing.
-    out_dim : int
-        Dimension of the output of the model.
-    geometry_dim : int, optional
-        Pointwise dimension of the geometry input features. If provided, geometry features will be projected onto physical states and used as context in all GALE layers. Default is None.
-    global_dim : int, optional
-        Dimension of the global embedding features. If provided, global features will be projected onto physical states and used as context in all GALE layers. Default is None.
+    functional_dim : int | tuple[int, ...]
+        Dimension of the input values (local embeddings), not including global
+        embeddings or geometry features. Input will be projected to ``n_hidden``
+        before processing. Can be a single int or tuple for multiple input types.
+    out_dim : int | tuple[int, ...]
+        Dimension of the output of the model. Must have same length as
+        ``functional_dim`` if both are tuples.
+    geometry_dim : int | None, optional
+        Pointwise dimension of the geometry input features. If provided, geometry
+        features will be projected onto physical states and used as context in all
+        GALE layers. Default is ``None``.
+    global_dim : int | None, optional
+        Dimension of the global embedding features. If provided, global features
+        will be projected onto physical states and used as context in all GALE
+        layers. Default is ``None``.
     n_layers : int, optional
         Number of GALE layers in the model. Default is 4.
     n_hidden : int, optional
@@ -105,76 +180,96 @@ class GeoTransolver(Module):
     dropout : float, optional
         Dropout rate applied across the GALE layers. Default is 0.0.
     n_head : int, optional
-        Number of attention heads in each GALE layer. Must evenly divide ``n_hidden`` to yield an integer head dimension. Default is 8.
+        Number of attention heads in each GALE layer. Must evenly divide
+        ``n_hidden`` to yield an integer head dimension. Default is 8.
     act : str, optional
-        Activation function name. Default is "gelu".
+        Activation function name. Default is ``"gelu"``.
     mlp_ratio : int, optional
         Ratio of MLP hidden dimension to ``n_hidden``. Default is 4.
     slice_num : int, optional
-        Number of learned physical state slices in the GALE layers, representing the number of learned states each layer should project inputs onto. Default is 32.
+        Number of learned physical state slices in the GALE layers, representing
+        the number of learned states each layer should project inputs onto.
+        Default is 32.
     use_te : bool, optional
-        Whether to use Transformer Engine backend when available. Default is True.
+        Whether to use Transformer Engine backend when available. Default is ``True``.
     time_input : bool, optional
-        Whether to include time embeddings. Default is False.
+        Whether to include time embeddings. Default is ``False``.
     plus : bool, optional
-        Whether to use Transolver++ features in the GALE layers. Default is False.
+        Whether to use Transolver++ features in the GALE layers. Default is ``False``.
     include_local_features : bool, optional
-        Whether to include local features in the global context. Default is False.
+        Whether to include local features in the global context. Default is ``False``.
     radii : list[float], optional
-        Radii for the local features. Default is [0.05, 0.25].
+        Radii for the local features. Default is ``[0.05, 0.25]``.
     neighbors_in_radius : list[int], optional
-        Neighbors in radius for the local features. Default is [8, 32].
+        Neighbors in radius for the local features. Default is ``[8, 32]``.
     n_hidden_local : int, optional
-        Hidden dimension for the local features. Default is 512.
+        Hidden dimension for the local features. Default is 32.
+
+    Forward
+    -------
+    local_embedding : torch.Tensor | tuple[torch.Tensor, ...]
+        Local embedding of the input data of shape :math:`(B, N, C)` where :math:`B`
+        is batch size, :math:`N` is number of nodes/tokens, and :math:`C` is
+        ``functional_dim``. Can be a single tensor or tuple for multiple input types.
+    local_positions : torch.Tensor | tuple[torch.Tensor, ...] | None, optional
+        Local positions for each input, each of shape :math:`(B, N, 3)`. Required if
+        ``include_local_features=True``. Default is ``None``.
+    global_embedding : torch.Tensor | None, optional
+        Global embedding of the input data of shape :math:`(B, N_g, C_g)` where
+        :math:`N_g` is number of global tokens and :math:`C_g` is ``global_dim``.
+        If ``None``, global context is not used. Default is ``None``.
+    geometry : torch.Tensor | None, optional
+        Geometry features of the input data of shape :math:`(B, N, C_{geo})` where
+        :math:`C_{geo}` is ``geometry_dim``. If ``None``, geometry context is not
+        used. Default is ``None``.
+    time : torch.Tensor | None, optional
+        Time embedding (currently not implemented). Default is ``None``.
+
+    Outputs
+    -------
+    torch.Tensor | tuple[torch.Tensor, ...]
+        Output tensor of shape :math:`(B, N, C_{out})` where :math:`C_{out}` is
+        ``out_dim``. Returns a single tensor if input was a single tensor, or a
+        tuple if input was a tuple.
 
     Raises
     ------
     ValueError
         If ``n_hidden`` is not evenly divisible by ``n_head``.
-
-
-    Forward
-    ----------
-    local_embedding : torch.Tensor
-        Local embedding of the input data of shape \((B, N, C)\) where \(B\) is batch size, \(N\) is number of nodes/tokens, and \(C\) is ``functional_dim``. Output will have the same \((B, N)\) shape but with ``out_dim`` channels.
-    global_embedding : torch.Tensor, optional
-        Global embedding of the input data of shape \((B, N_g, C_g)\) where \(N_g\) is number of global tokens and \(C_g\) is ``global_dim``. If None, global context is not used. Default is None.
-    geometry : torch.Tensor, optional
-        Geometry features of the input data of shape \((B, N, C_{geo})\) where \(C_{geo}\) is ``geometry_dim``. If None, geometry context is not used. Default is None.
-    time : torch.Tensor, optional
-        Time embedding (currently not implemented). Default is None.
-
-    Returns
-    -------
-    torch.Tensor
-        Output tensor of shape \((B, N, C_{out})\) where \(C_{out}\) is ``out_dim``.
+    ValueError
+        If ``functional_dim`` and ``out_dim`` have different lengths when both
+        are tuples.
+    NotImplementedError
+        If ``time`` is provided (not yet implemented).
 
     Notes
     -----
-    GeoTransolver currently supports unstructured mesh input only. Enhancements for image-based
-    and voxel-based inputs may be available in the future.
+    GeoTransolver currently supports unstructured mesh input only. Enhancements for
+    image-based and voxel-based inputs may be available in the future.
 
     For more details on Transolver, see:
-    - https://arxiv.org/pdf/2402.02366
-    - https://arxiv.org/pdf/2502.02414
+
+    - `Transolver paper <https://arxiv.org/pdf/2402.02366>`_
+    - `Transolver++ paper <https://arxiv.org/pdf/2502.02414>`_
 
     See Also
     --------
-    :class:`GALE` : The attention mechanism used in GeoTransolver.
-    :class:`GALE_block` : Transformer block using GALE attention.
-    :class:`ContextProjector` : Projects context features onto physical states.
+    :class:`~physicsnemo.experimental.models.geotransolver.gale.GALE` : The attention mechanism used in GeoTransolver.
+    :class:`~physicsnemo.experimental.models.geotransolver.gale.GALE_block` : Transformer block using GALE attention.
+    :class:`~physicsnemo.experimental.models.geotransolver.context_projector.ContextProjector` : Projects context features onto physical states.
 
     Examples
     --------
     Basic usage with local embeddings only:
 
     >>> import torch
-    >>> import physicsnemo
-    >>> model = physicsnemo.models.GeoTransolver(
+    >>> from physicsnemo.experimental.models.geotransolver import GeoTransolver
+    >>> model = GeoTransolver(
     ...     functional_dim=64,
     ...     out_dim=3,
     ...     n_hidden=256,
-    ...     n_layers=4
+    ...     n_layers=4,
+    ...     use_te=False,
     ... )
     >>> local_emb = torch.randn(2, 1000, 64)  # (batch, nodes, features)
     >>> output = model(local_emb)
@@ -183,13 +278,14 @@ class GeoTransolver(Module):
 
     Usage with geometry and global context:
 
-    >>> model = physicsnemo.models.GeoTransolver(
+    >>> model = GeoTransolver(
     ...     functional_dim=64,
     ...     out_dim=3,
     ...     geometry_dim=3,
     ...     global_dim=16,
     ...     n_hidden=256,
-    ...     n_layers=4
+    ...     n_layers=4,
+    ...     use_te=False,
     ... )
     >>> local_emb = torch.randn(2, 1000, 64)
     >>> geometry = torch.randn(2, 1000, 3)  # (batch, nodes, spatial_dim)
@@ -216,22 +312,30 @@ def __init__(
         time_input: bool = False,
         plus: bool = False,
         include_local_features: bool = False,
-        radii: list[float] = [0.05, 0.25],
-        neighbors_in_radius: list[int] = [8, 32],
+        radii: list[float] | None = None,
+        neighbors_in_radius: list[int] | None = None,
         n_hidden_local: int = 32,
     ) -> None:
         super().__init__(meta=GeoTransolverMetaData())
         self.__name__ = "GeoTransolver"
 
-        self.include_local_features = include_local_features
+        # Set defaults for mutable arguments
+        if radii is None:
+            radii = [0.05, 0.25]
+        if neighbors_in_radius is None:
+            neighbors_in_radius = [8, 32]
 
+        self.include_local_features = include_local_features
         self.use_te = use_te
-        # Check that the hidden dimension and head dimensions are compatible:
+
+        # Validate head dimension compatibility
         if not n_hidden % n_head == 0:
             raise ValueError(
-                f"GeoTransolver requires n_hidden % n_head == 0, but instead got {n_hidden % n_head}"
+                f"GeoTransolver requires n_hidden % n_head == 0, "
+                f"but instead got {n_hidden % n_head}"
             )
 
+        # Normalize dimension specifications to tuples
         functional_dims = _normalize_dim(functional_dim)
         out_dims = _normalize_dim(out_dim)
 
@@ -255,15 +359,15 @@ def __init__(
             include_local_features=self.include_local_features,
         )
         context_dim = self.context_builder.get_context_dim()
-      
+
+        # Validate dimension tuple lengths match
         if len(functional_dims) != len(out_dims):
             raise ValueError(
-                f"functional_dim and out_dim must be the same length, but instead got {len(functional_dims)} and {len(out_dims)}"
+                f"functional_dim and out_dim must be the same length, "
+                f"but instead got {len(functional_dims)} and {len(out_dims)}"
             )
-      
-        # This MLP is the initial projection onto the hidden space.
-        # One per input "type"
-        
+
+        # Input projection MLPs - one per input type
         self.preprocess = nn.ModuleList(
             [
                 MLP(
@@ -281,101 +385,177 @@ def __init__(
 
         self.n_hidden = n_hidden
 
+        # Compute effective hidden dimension including local features
+        effective_hidden = (
+            n_hidden + n_hidden_local * len(self.radii)
+            if self.include_local_features
+            else n_hidden
+        )
+
+        # GALE transformer blocks
         self.blocks = nn.ModuleList(
             [
                 GALE_block(
                     num_heads=n_head,
-                    hidden_dim=n_hidden + n_hidden_local * len(self.radii) if self.include_local_features else n_hidden,
+                    hidden_dim=effective_hidden,
                     dropout=dropout,
                     act=act,
                     mlp_ratio=mlp_ratio,
                     slice_num=slice_num,
-                    last_layer=(_ == n_layers - 1),
+                    last_layer=(layer_idx == n_layers - 1),
                     use_te=use_te,
                     plus=plus,
                     context_dim=context_dim,
                 )
-                for _ in range(n_layers)
+                for layer_idx in range(n_layers)
             ]
         )
 
+        # Output projection layers - one per output type
         if use_te:
             self.ln_mlp_out = nn.ModuleList(
                 [
-                    te.LayerNormLinear(
-                        in_features=n_hidden + n_hidden_local * len(self.radii) if self.include_local_features else n_hidden, out_features=o
-                    ) for o in out_dims
-                 ]
+                    te.LayerNormLinear(in_features=effective_hidden, out_features=o)
+                    for o in out_dims
+                ]
             )
         else:
             self.ln_mlp_out = nn.ModuleList(
                 [
                     nn.Sequential(
-                        nn.LayerNorm(n_hidden + n_hidden_local * len(self.radii) if self.include_local_features else n_hidden),
-                        nn.Linear(n_hidden + n_hidden_local * len(self.radii) if self.include_local_features else n_hidden, o),
+                        nn.LayerNorm(effective_hidden),
+                        nn.Linear(effective_hidden, o),
                     )
                     for o in out_dims
                 ]
-                
             )
 
-
+        # Time embedding network (optional, not yet implemented)
         self.time_input = time_input
         if time_input:
             self.time_fc = nn.Sequential(
-                nn.Linear(n_hidden, n_hidden), nn.SiLU(), nn.Linear(n_hidden, n_hidden)
+                nn.Linear(n_hidden, n_hidden),
+                nn.SiLU(),
+                nn.Linear(n_hidden, n_hidden),
             )
 
     def forward(
         self,
-        local_embedding: torch.Tensor | tuple[torch.Tensor, ...],
-        local_positions: torch.Tensor | tuple[torch.Tensor, ...] | None = None,
-        global_embedding: torch.Tensor | None = None,
-        geometry: torch.Tensor | None = None,
+        local_embedding: (
+            Float[torch.Tensor, "batch tokens features"]
+            | tuple[Float[torch.Tensor, "batch tokens features"], ...]
+        ),
+        local_positions: (
+            Float[torch.Tensor, "batch tokens spatial_dim"]
+            | tuple[Float[torch.Tensor, "batch tokens spatial_dim"], ...]
+            | None
+        ) = None,
+        global_embedding: Float[torch.Tensor, "batch global_tokens global_dim"]
+        | None = None,
+        geometry: Float[torch.Tensor, "batch tokens geometry_dim"] | None = None,
         time: torch.Tensor | None = None,
-    ) -> torch.Tensor:
+    ) -> (
+        Float[torch.Tensor, "batch tokens out_dim"]
+        | tuple[Float[torch.Tensor, "batch tokens out_dim"], ...]
+    ):
         r"""Forward pass of the GeoTransolver model.
 
-        The model constructs global context embeddings from geometry and global features by
-        projecting them onto physical state spaces. These context embeddings are then used
-        in all GALE blocks via cross-attention, allowing geometric and global information to
-        guide the learned physical state dynamics.
-        
-
+        The model constructs global context embeddings from geometry and global features
+        by projecting them onto physical state spaces. These context embeddings are then
+        used in all GALE blocks via cross-attention, allowing geometric and global
+        information to guide the learned physical state dynamics.
+
+        Parameters
+        ----------
+        local_embedding : torch.Tensor | tuple[torch.Tensor, ...]
+            Local embedding of the input data of shape :math:`(B, N, C)` where
+            :math:`B` is batch size, :math:`N` is number of nodes/tokens, and
+            :math:`C` is ``functional_dim``.
+        local_positions : torch.Tensor | tuple[torch.Tensor, ...] | None, optional
+            Local positions for each input, each of shape :math:`(B, N, 3)`.
+            Required if ``include_local_features=True``. Default is ``None``.
+        global_embedding : torch.Tensor | None, optional
+            Global embedding of shape :math:`(B, N_g, C_g)`. Default is ``None``.
+        geometry : torch.Tensor | None, optional
+            Geometry features of shape :math:`(B, N, C_{geo})`. Default is ``None``.
+        time : torch.Tensor | None, optional
+            Time embedding (not yet implemented). Default is ``None``.
+
+        Returns
+        -------
+        torch.Tensor | tuple[torch.Tensor, ...]
+            Output tensor of shape :math:`(B, N, C_{out})`. Returns single tensor
+            if input was single tensor, tuple if input was tuple.
+
+        Raises
+        ------
+        NotImplementedError
+            If ``time`` is provided.
+        ValueError
+            If input tensors have incorrect dimensions.
         """
-
+        # Track whether input was a single tensor for output format
         single_input = isinstance(local_embedding, torch.Tensor)
-        
+
+        # Time embedding not yet supported
         if time is not None:
-            raise NotImplementedError("Time input is not implemented yet."
-                                      "Error rather than silently ignoring it.")
+            raise NotImplementedError(
+                "Time input is not implemented yet. "
+                "Error rather than silently ignoring it."
+            )
 
+        # Normalize inputs to tuple format
         local_embedding = _normalize_tensor(local_embedding)
         if local_positions is not None:
             local_positions = _normalize_tensor(local_positions)
-        
-        # Build context and extract local features using the context builder
+
+        ### Input validation
+        if not torch.compiler.is_compiling():
+            if len(local_embedding) == 0:
+                raise ValueError("Expected non-empty local_embedding")
+            for i, tensor in enumerate(local_embedding):
+                if tensor.ndim != 3:
+                    raise ValueError(
+                        f"Expected 3D local_embedding tensor (B, N, C) at index {i}, "
+                        f"got {tensor.ndim}D tensor with shape {tuple(tensor.shape)}"
+                    )
+            if geometry is not None and geometry.ndim != 3:
+                raise ValueError(
+                    f"Expected 3D geometry tensor (B, N, C_geo), "
+                    f"got {geometry.ndim}D tensor with shape {tuple(geometry.shape)}"
+                )
+            if global_embedding is not None and global_embedding.ndim != 3:
+                raise ValueError(
+                    f"Expected 3D global_embedding tensor (B, N_g, C_g), "
+                    f"got {global_embedding.ndim}D tensor with shape {tuple(global_embedding.shape)}"
+                )
+
+        # Build context embeddings and extract local features
         embedding_states, local_embedding_bq = self.context_builder.build_context(
             local_embedding, local_positions, geometry, global_embedding
         )
 
-        # Project inputs to hidden dimension
+        # Project inputs to hidden dimension: (B, N, C) -> (B, N, n_hidden)
         x = [self.preprocess[i](le) for i, le in enumerate(local_embedding)]
 
         # Concatenate local features if enabled
         if self.include_local_features and local_embedding_bq is not None:
-            x = [torch.cat([x[i], local_embedding_bq[i]], dim=-1) for i in range(len(x))]
+            x = [
+                torch.cat([x[i], local_embedding_bq[i]], dim=-1)
+                for i in range(len(x))
+            ]
 
+        # Pass through GALE transformer blocks with context cross-attention
         for block in self.blocks:
-            x = block(x, embedding_states)
+            x = block(tuple(x), embedding_states)
 
-        # Now, pass the data through the model:
+        # Project to output dimensions: (B, N, n_hidden) -> (B, N, out_dim)
         x = [self.ln_mlp_out[i](x[i]) for i in range(len(x))]
 
+        # Return same format as input (single tensor or tuple)
         if single_input:
-            # If only one input came in, use just that as output:
             x = x[0]
         else:
             x = tuple(x)
 
-        return x
+        return x
\ No newline at end of file

From b7b9fb2413465a436051f90cdadaddbb634ad90e Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 13 Jan 2026 18:06:54 +0000
Subject: [PATCH 32/32] Reduce cpu time for transolver test; include a dynamo
 reset button that automatically runs on  each test

---
 test/conftest.py                          | 15 +++++++++++++++
 test/models/transolver/test_transolver.py | 13 +++++++++----
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/test/conftest.py b/test/conftest.py
index ec03bfc6c6..ab86abd44d 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -196,3 +196,18 @@ def seed_random_state():
         torch.cuda.manual_seed_all(SEED)
 
     yield
+
+
+@pytest.fixture(autouse=True, scope="function")
+def reset_dynamo_state():
+    """Reset torch._dynamo state after each test.
+
+    This ensures test isolation by cleaning up dynamo's compiled function cache
+    and resetting configuration options like error_on_recompile. Without this,
+    tests that set error_on_recompile=True can cause subsequent tests to fail
+    when they trigger recompilation with different tensor shapes.
+    """
+    yield
+    # Reset after test completes
+    torch._dynamo.reset()
+    torch._dynamo.config.error_on_recompile = False
diff --git a/test/models/transolver/test_transolver.py b/test/models/transolver/test_transolver.py
index be52d93b56..5758103d6f 100644
--- a/test/models/transolver/test_transolver.py
+++ b/test/models/transolver/test_transolver.py
@@ -133,10 +133,15 @@ def setup_model():
             use_te=False,
         ).to(device)
 
-        bsize = 4
-
-        embedding = torch.randn(bsize, 12345, 3).to(device)
-        functional_input = torch.randn(bsize, 12345, 2).to(device)
+        if device == "cuda:0":
+            bsize = 4
+            n_points = 12345
+        else:
+            bsize = 1
+            n_points = 123
+
+        embedding = torch.randn(bsize, n_points, 3).to(device)
+        functional_input = torch.randn(bsize, n_points, 2).to(device)
 
         return model, embedding, functional_input