diff --git a/examples/additive_manufacturing/sintering_physics/inference.py b/examples/additive_manufacturing/sintering_physics/inference.py
index 2a870aa631..fbb5a3df48 100644
--- a/examples/additive_manufacturing/sintering_physics/inference.py
+++ b/examples/additive_manufacturing/sintering_physics/inference.py
@@ -32,15 +32,6 @@
         "Mesh Graph Net Datapipe requires the Tensorflow library. Install the "
         + "package at: https://www.tensorflow.org/install"
     )
-physical_devices = tf.config.list_physical_devices("GPU")
-
-try:
-    for device_ in physical_devices:
-        tf.config.experimental.set_memory_growth(device_, True)
-except:
-    # Invalid device or cannot modify virtual devices once initialized.
-    pass
-
 import hydra
 import torch
 from graph_dataset import GraphDataset
diff --git a/examples/additive_manufacturing/sintering_physics/requirements.txt b/examples/additive_manufacturing/sintering_physics/requirements.txt
index 667a0024d5..58c48332e6 100644
--- a/examples/additive_manufacturing/sintering_physics/requirements.txt
+++ b/examples/additive_manufacturing/sintering_physics/requirements.txt
@@ -1,3 +1,6 @@
 # pyvista is optional, required if need to run data preprocessing from raw simulation
 # pyvista==0.32.1
-tensorflow>=2.15,<3.0 # generate tfrecord
+# CPU-only TF avoids the bundled CUDA 12 runtime that conflicts with the
+# PhysicsNeMo container's CUDA 13 (PyTorch raises cudaErrorStubLibrary 302
+# and the process aborts). Only used here as a TFRecord parser.
+tensorflow-cpu>=2.15,<3.0
diff --git a/examples/additive_manufacturing/sintering_physics/train.py b/examples/additive_manufacturing/sintering_physics/train.py
index 4062337f6e..dded633280 100644
--- a/examples/additive_manufacturing/sintering_physics/train.py
+++ b/examples/additive_manufacturing/sintering_physics/train.py
@@ -58,15 +58,6 @@
 )
 from physicsnemo.models.vfgn.graph_network_modules import VFGNLearnedSimulator
 
-physical_devices = tf.config.list_physical_devices("GPU")
-try:
-    for device_ in physical_devices:
-        tf.config.experimental.set_memory_growth(device_, True)
-except:
-    # Invalid device or cannot modify virtual devices once initialized.
-    pass
-
-
 def Train(rank_zero_logger, dist, cfg: DictConfig):
     """
     Trains a graph-based model, evaluating and saving its performance periodically.
diff --git a/examples/cfd/external_aerodynamics/xaeronet/README.md b/examples/cfd/external_aerodynamics/xaeronet/README.md
index 9f3dd5d54c..30f67e345b 100644
--- a/examples/cfd/external_aerodynamics/xaeronet/README.md
+++ b/examples/cfd/external_aerodynamics/xaeronet/README.md
@@ -75,13 +75,29 @@ dataset, please refer to their [paper](https://arxiv.org/pdf/2408.11969).
 
 ## XAeroNet-S prerequisites
 
-Install the requirements using:
+Install the base requirements:
 
 ```bash
 pip install -r requirements.txt
-pip install pyg-lib -f https://data.pyg.org/whl/torch-2.8.0+cu129.html
 ```
 
+`pyg-lib` and `torch_scatter` ship as compiled CUDA extensions and must be
+installed from PyG's pre-built wheel index that matches your installed
+`torch` and CUDA versions. The two-line snippet below detects both and
+constructs the correct URL:
+
+```bash
+TORCH=$(python -c "import torch; print(torch.__version__.split('+')[0])")
+CUDA=$(python -c "import torch; v=torch.version.cuda; print('cu' + v.replace('.', '') if v else 'cpu')")
+pip install pyg-lib torch_scatter -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
+```
+
+If PyG has not published a wheel for your exact torch+CUDA combination
+yet, browse <https://data.pyg.org/whl/> to find the closest match, or
+build from source with `pip install --no-build-isolation torch_scatter`
+(plain `pip install torch_scatter` fails because pip's build isolation
+hides the installed `torch` from the build environment).
+
 See `pyg-lib` [installation instructions](https://github.com/pyg-team/pyg-lib?tab=readme-ov-file#installation)
 for more details.
 
diff --git a/examples/cfd/external_aerodynamics/xaeronet/requirements.txt b/examples/cfd/external_aerodynamics/xaeronet/requirements.txt
index 77b1dfda20..3dd86d60c1 100644
--- a/examples/cfd/external_aerodynamics/xaeronet/requirements.txt
+++ b/examples/cfd/external_aerodynamics/xaeronet/requirements.txt
@@ -1,6 +1,8 @@
 trimesh>=4.5.0
 torch_geometric>=2.6.1
-torch_scatter>=2.1.2
 pyvista
 vtk
 wandb
+scikit-learn
+tabulate
+matplotlib
diff --git a/physicsnemo/datapipes/gnn/ahmed_body_dataset.py b/physicsnemo/datapipes/gnn/ahmed_body_dataset.py
index f30debda1c..6cbfe17c26 100644
--- a/physicsnemo/datapipes/gnn/ahmed_body_dataset.py
+++ b/physicsnemo/datapipes/gnn/ahmed_body_dataset.py
@@ -47,6 +47,14 @@
 logger = logging.getLogger(__name__)
 
 
+def _init_pool_worker():
+    # Use file-system-backed shared memory for tensors returned from this
+    # pool. The default file-descriptor strategy passes one FD per tensor
+    # via SCM_RIGHTS and trips "RuntimeError: received 0 items of ancdata"
+    # when RLIMIT_NOFILE is exhausted (e.g. Ubuntu's default 1024).
+    torch.multiprocessing.set_sharing_strategy("file_system")
+
+
 @dataclass
 class FileInfo:
     """VTP file info storage."""
@@ -204,6 +212,7 @@ def get_num_workers():
         with cf.ProcessPoolExecutor(
             max_workers=num_workers,
             mp_context=torch.multiprocessing.get_context("spawn"),
+            initializer=_init_pool_worker,
         ) as executor:
             for i, graph, coeff, normal, area in executor.map(
                 self.create_graph,