diff --git a/README.md b/README.md
index 469fee13..7b72e845 100644
--- a/README.md
+++ b/README.md
@@ -116,7 +116,7 @@ The first is lossless compression via GZIP, the second is lossy compression.
 For the group membership files we only apply lossless compression. However,
 each property in the final SOAP catalogue has a lossy compression filter
 associated with it, which are set in `SOAP/property_table.py`. The script 
-`compression/compress_soap_catalogue.py` will apply both lossy and
+`SOAP/compression/compress_soap_catalogue.py` will apply both lossy and
 lossless compression to SOAP catalogues.
 
 ### Documentation
diff --git a/SOAP/catalogue_readers/read_hbtplus.py b/SOAP/catalogue_readers/read_hbtplus.py
index d91c5849..4dff8d93 100644
--- a/SOAP/catalogue_readers/read_hbtplus.py
+++ b/SOAP/catalogue_readers/read_hbtplus.py
@@ -264,7 +264,7 @@ def read_hbtplus_catalogue(
         MassInMsunh = None
         VelInKmS = None
         sorted_file = None
-    (LengthInMpch, MassInMsunh, VelInKmS) = comm.bcast(
+    LengthInMpch, MassInMsunh, VelInKmS = comm.bcast(
         (LengthInMpch, MassInMsunh, VelInKmS)
     )
     sorted_file = comm.bcast(sorted_file)
diff --git a/compression/README.md b/SOAP/compression/README.md
similarity index 100%
rename from compression/README.md
rename to SOAP/compression/README.md
diff --git a/compression/compress_soap_catalogue.py b/SOAP/compression/compress_soap_catalogue.py
similarity index 100%
rename from compression/compress_soap_catalogue.py
rename to SOAP/compression/compress_soap_catalogue.py
diff --git a/compression/create_empty_SOAP_catalogue.py b/SOAP/compression/create_empty_SOAP_catalogue.py
similarity index 100%
rename from compression/create_empty_SOAP_catalogue.py
rename to SOAP/compression/create_empty_SOAP_catalogue.py
diff --git a/compression/extract_filters.py b/SOAP/compression/extract_filters.py
similarity index 100%
rename from compression/extract_filters.py
rename to SOAP/compression/extract_filters.py
diff --git a/compression/filters.yml b/SOAP/compression/filters.yml
similarity index 100%
rename from compression/filters.yml
rename to SOAP/compression/filters.yml
diff --git a/SOAP/compression/make_virtual_snapshot.py b/SOAP/compression/make_virtual_snapshot.py
new file mode 100644
index 00000000..2311840e
--- /dev/null
+++ b/SOAP/compression/make_virtual_snapshot.py
@@ -0,0 +1,362 @@
+#!/bin/env python
+
+import os.path
+import h5py
+import shutil
+import numpy as np
+
+
+class SafeDict(dict):
+    def __missing__(self, key):
+        # Return the key back in braces so it remains in the string
+        return "{" + key + "}"
+
+
+def update_vds_paths(dset, modify_function):
+    """
+    Modify the virtual paths of the specified dataset
+
+    Note that querying the source dataspace and selection does not appear
+    to work (invalid pointer error from h5py) so here we assume that we're
+    referencing all of the source dataspace, which is correct for SWIFT
+    snapshots.
+
+    dset:            a h5py.Dataset object
+    modify_function: a function which takes the old path as its argument and
+                     returns the new path
+    """
+
+    # Choose a temporary path for the new virtual dataset
+    path = dset.name
+    tmp_path = dset.name + ".__tmp__"
+
+    # Build the creation property list for the new dataset
+    plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE)
+    for vs in dset.virtual_sources():
+        bounds = vs.vspace.get_select_bounds()
+        if bounds is not None:
+            lower, upper = bounds
+            size = np.asarray(upper, dtype=int) - np.asarray(lower, dtype=int) + 1
+            src_space = h5py.h5s.create_simple(tuple(size))
+            new_name = modify_function(vs.file_name)
+            plist.set_virtual(
+                vs.vspace, new_name.encode(), vs.dset_name.encode(), src_space
+            )
+
+    # Create the new dataset
+    tmp_dset = h5py.h5d.create(
+        dset.file["/"].id,
+        tmp_path.encode(),
+        dset.id.get_type(),
+        dset.id.get_space(),
+        dcpl=plist,
+    )
+    tmp_dset = h5py.Dataset(tmp_dset)
+    for attr_name in dset.attrs:
+        tmp_dset.attrs[attr_name] = dset.attrs[attr_name]
+
+    # Rename the new dataset
+    f = dset.file
+    del f[path]
+    f[path] = f[tmp_path]
+    del f[tmp_path]
+
+
+def make_virtual_snapshot(
+    snapshot,
+    auxiliary_snapshots,
+    output_file,
+    absolute_paths=False,
+    discard_duplicate_datasets=False,
+):
+    """
+    Given a snapshot and auxiliary files, create
+    a new virtual snapshot with all datasets combine.
+
+    snapshot: Path to the snapshot file
+    auxiliary_snapshots: List of auxiliary file patterns
+    output_file: Path to the output virtual snapshot
+    absolute_paths: If True, use absolute paths; if False, use relative paths
+    """
+
+    # Copy the input virtual snapshot to the output
+    shutil.copyfile(snapshot, output_file)
+
+    # Open the output file
+    outfile = h5py.File(output_file, "r+")
+
+    # Calculate directories for path updates
+    abs_snapshot_dir = os.path.abspath(os.path.dirname(snapshot))
+    abs_auxiliary_dirs = [
+        os.path.abspath(os.path.dirname(aux.format(file_nr=0)))
+        for aux in auxiliary_snapshots
+    ]
+    abs_output_dir = os.path.abspath(os.path.dirname(output_file))
+
+    if absolute_paths:
+        snapshot_dir = abs_snapshot_dir
+        auxiliary_dirs = abs_auxiliary_dirs
+    else:
+        snapshot_dir = os.path.relpath(abs_snapshot_dir, abs_output_dir)
+        auxiliary_dirs = [
+            os.path.relpath(aux_dir, abs_output_dir) for aux_dir in abs_auxiliary_dirs
+        ]
+
+    # Create path replacement functions
+    def make_replace_path(target_dir):
+        def replace_path(old_path):
+            basename = os.path.basename(old_path)
+            return os.path.join(target_dir, basename)
+
+        return replace_path
+
+    replace_snapshot_path = make_replace_path(snapshot_dir)
+    auxiliary_path_replacers = [make_replace_path(d) for d in auxiliary_dirs]
+
+    all_auxiliary_datasets = {}
+
+    for aux_index, auxiliary in enumerate(auxiliary_snapshots):
+
+        # Check which datasets exist in the auxiliary files
+        # and store their attributes and datatype
+        filename = auxiliary.format(file_nr=0)
+        dset_attrs = {}
+        dset_dtype = {}
+        with h5py.File(filename, "r") as infile:
+            for ptype in range(7):
+                if not f"PartType{ptype}" in infile:
+                    continue
+                dset_attrs[f"PartType{ptype}"] = {}
+                dset_dtype[f"PartType{ptype}"] = {}
+                for dset in infile[f"PartType{ptype}"].keys():
+                    attrs = dict(infile[f"PartType{ptype}/{dset}"].attrs)
+                    dtype = infile[f"PartType{ptype}/{dset}"].dtype
+
+                    # Some auxiliary files are missing these attributes
+                    if not "Value stored as physical" in attrs:
+                        print(f"Setting comoving attrs for PartType{ptype}/{dset}")
+                        attrs["Value stored as physical"] = [1]
+                        attrs["Property can be converted to comoving"] = [0]
+
+                    # Add a flag that these datasets are stored in the auxiliary files
+                    attrs["auxiliary file"] = [1]
+
+                    # Store the values we need for later
+                    dset_attrs[f"PartType{ptype}"][dset] = attrs
+                    dset_dtype[f"PartType{ptype}"][dset] = dtype
+
+                    # Check we don't have this dataset in any of the other auxiliary files
+                    dset_path = f"PartType{ptype}/{dset}"
+                    if dset_path in all_auxiliary_datasets:
+                        other_file = all_auxiliary_datasets[f"PartType{ptype}/{dset}"]
+                        raise ValueError(
+                            f"{dset_path} is in {auxiliary} and {other_file}"
+                        )
+                    all_auxiliary_datasets[dset_path] = auxiliary
+
+            # Copy over the named column values, handling the case where we have
+            # dataset names that already exist in the original snapshot
+            for dset in infile.get("SubgridScheme/NamedColumns", []):
+                outfile_named_cols = outfile["SubgridScheme/NamedColumns"]
+                if dset in outfile_named_cols:
+                    if discard_duplicate_datasets:
+                        del outfile_named_cols[dset]
+                    else:
+                        outfile.move(
+                            f"SubgridScheme/NamedColumns/{dset}",
+                            f"SubgridScheme/NamedColumns/{dset}_snap",
+                        )
+                outfile_named_cols.create_dataset(
+                    dset,
+                    data=infile[f"SubgridScheme/NamedColumns/{dset}"],
+                )
+
+        # Loop over input auxiliary files to get dataset shapes
+        file_nr = 0
+        filenames = []
+        shapes = []
+        counts = []
+        while True:
+            filename = auxiliary.format(file_nr=file_nr)
+            if os.path.exists(filename):
+                filenames.append(filename)
+                with h5py.File(filename, "r") as infile:
+                    shape = {}
+                    count = {}
+                    for ptype in range(7):
+                        if f"PartType{ptype}" not in dset_attrs:
+                            continue
+                        shape[f"PartType{ptype}"] = {}
+                        # Get the shape for each dataset
+                        for dset in dset_attrs[f"PartType{ptype}"]:
+                            s = infile[f"PartType{ptype}/{dset}"].shape
+                            shape[f"PartType{ptype}"][dset] = s
+                        # Get the number of particles in this chunk file
+                        count[f"PartType{ptype}"] = s[0]
+                    shapes.append(shape)
+                    counts.append(count)
+            else:
+                break
+            file_nr += 1
+        if file_nr == 0:
+            raise IOError(f"Failed to find files matching: {auxiliary}")
+
+        # Loop over particle types in the output
+        for ptype in range(7):
+            if f"PartType{ptype}" not in dset_attrs:
+                continue
+
+            # Create virtual layout for new datasets
+            layouts = {}
+            nr_parts = sum([count[f"PartType{ptype}"] for count in counts])
+            for dset in dset_attrs[f"PartType{ptype}"]:
+                full_shape = list(shapes[0][f"PartType{ptype}"][dset])
+                full_shape[0] = nr_parts
+                full_shape = tuple(full_shape)
+                dtype = dset_dtype[f"PartType{ptype}"][dset]
+                layouts[dset] = h5py.VirtualLayout(shape=full_shape, dtype=dtype)
+
+            # Loop over input files
+            offset = 0
+            for filename, count, shape in zip(filenames, counts, shapes):
+                n_part = count[f"PartType{ptype}"]
+                for dset in dset_attrs[f"PartType{ptype}"]:
+                    layouts[dset][offset : offset + n_part] = h5py.VirtualSource(
+                        filename,
+                        f"PartType{ptype}/{dset}",
+                        shape=shape[f"PartType{ptype}"][dset],
+                    )
+                offset += n_part
+
+            # Create the virtual datasets, renaming datasets if they
+            # already exist in the snapshot
+            for dset, attrs in dset_attrs[f"PartType{ptype}"].items():
+                if f"PartType{ptype}/{dset}" in outfile:
+                    if discard_duplicate_datasets:
+                        del outfile[f"PartType{ptype}/{dset}"]
+                    else:
+                        outfile.move(
+                            f"PartType{ptype}/{dset}", f"PartType{ptype}/{dset}_snap"
+                        )
+                outfile.create_virtual_dataset(
+                    f"PartType{ptype}/{dset}", layouts[dset], fillvalue=-999
+                )
+                for k, v in attrs.items():
+                    outfile[f"PartType{ptype}/{dset}"].attrs[k] = v
+
+                # Update paths for this newly created auxiliary dataset
+                update_vds_paths(
+                    outfile[f"PartType{ptype}/{dset}"],
+                    auxiliary_path_replacers[aux_index],
+                )
+
+                # Copy GroupNr_bound to HaloCatalogueIndex, since
+                # that is the name in SOAP
+                if dset == "GroupNr_bound":
+                    outfile.create_virtual_dataset(
+                        f"PartType{ptype}/HaloCatalogueIndex",
+                        layouts["GroupNr_bound"],
+                        fillvalue=-999,
+                    )
+                    for k, v in outfile[f"PartType{ptype}/GroupNr_bound"].attrs.items():
+                        outfile[f"PartType{ptype}/HaloCatalogueIndex"].attrs[k] = v
+
+                    # Update paths for HaloCatalogueIndex too
+                    update_vds_paths(
+                        outfile[f"PartType{ptype}/HaloCatalogueIndex"],
+                        auxiliary_path_replacers[aux_index],
+                    )
+
+    # Update paths for all original snapshot datasets
+    for ptype in range(7):
+        ptype_name = f"PartType{ptype}"
+        if ptype_name in outfile:
+            for dset_name in list(outfile[ptype_name].keys()):
+                dset = outfile[f"{ptype_name}/{dset_name}"]
+                if dset.is_virtual:
+                    # Check if this is an auxiliary dataset (skip those, already handled)
+                    if dset.attrs.get("auxiliary file", [0])[0] != 1:
+                        # This is an original snapshot dataset
+                        update_vds_paths(dset, replace_snapshot_path)
+
+    # Done
+    outfile.close()
+
+
+if __name__ == "__main__":
+
+    import argparse
+
+    # For description of parameters run the following: $ python make_virtual_snapshot.py --help
+    parser = argparse.ArgumentParser(
+        description=(
+            "Link SWIFT snapshots with SWIFT auxiliary snapshots (snapshot-like"
+            "files with the same number of particles in the same order as the"
+            "snapshot, but with less metadata), such as the SOAP memberships."
+        )
+    )
+    parser.add_argument(
+        "--virtual-snapshot",
+        type=str,
+        required=True,
+        help="Name of the SWIFT virtual snapshot file, e.g. snapshot_{snap_nr:04}.hdf5",
+    )
+    parser.add_argument(
+        "--auxiliary-snapshots",
+        type=str,
+        nargs="+",
+        required=True,
+        help="One of more format strings for auxiliary files, e.g. membership_{snap_nr:04}.{file_nr}.hdf5",
+    )
+    parser.add_argument(
+        "--output-file",
+        type=str,
+        required=True,
+        help="Name of the virtual snapshot to create, e.g. membership_{snap_nr:04}.hdf5",
+    )
+    parser.add_argument(
+        "--snap-nr",
+        type=int,
+        required=False,
+        default=-1,
+        help="Snapshot number (default: -1). Not required if snap_nr is present in filenames passed.",
+    )
+    parser.add_argument(
+        "--absolute-paths",
+        action="store_true",
+        help="Use absolute paths in the virtual dataset",
+    )
+    parser.add_argument(
+        "--discard-duplicate-datasets",
+        action="store_true",
+        help=(
+            "This flag determines the behaviour when a dataset exists in both the original snapshot"
+            "and the auxilary file. By default the virtual file will rename the original snapshot"
+            "dataset as {dataset_name}_snap. If this flag is passed then the dataset from the original"
+            "snapshot will not be linked to"
+        ),
+    )
+    args = parser.parse_args()
+
+    print(f"Creating virtual snapshot")
+    for k, v in vars(args).items():
+        print(f"  {k}: {v}")
+
+    # Substitute snap number
+    virtual_snapshot = args.virtual_snapshot.format(snap_nr=args.snap_nr)
+    output_file = args.output_file.format(snap_nr=args.snap_nr)
+
+    # We don't want to replace {file_nr} for auxiliary snapshots
+    auxiliary_snapshots = [
+        filename.format_map(SafeDict({"snap_nr": args.snap_nr}))
+        for filename in args.auxiliary_snapshots
+    ]
+
+    # Make a new virtual snapshot with group info
+    make_virtual_snapshot(
+        virtual_snapshot,
+        auxiliary_snapshots,
+        output_file,
+        absolute_paths=args.absolute_paths,
+        discard_duplicate_datasets=args.discard_duplicate_datasets,
+    )
diff --git a/compression/wrong_compression.yml b/SOAP/compression/wrong_compression.yml
similarity index 100%
rename from compression/wrong_compression.yml
rename to SOAP/compression/wrong_compression.yml
diff --git a/SOAP/compute_halo_properties.py b/SOAP/compute_halo_properties.py
index d9a9be12..46e72258 100644
--- a/SOAP/compute_halo_properties.py
+++ b/SOAP/compute_halo_properties.py
@@ -40,7 +40,6 @@
 from SOAP.particle_filter.cold_dense_gas_filter import ColdDenseGasFilter
 from SOAP.particle_filter.recently_heated_gas_filter import RecentlyHeatedGasFilter
 
-
 # Set numpy to raise divide by zero, overflow and invalid operation errors as exceptions
 np.seterr(divide="raise", over="raise", invalid="raise")
 
diff --git a/SOAP/core/combine_chunks.py b/SOAP/core/combine_chunks.py
index 1f969570..99c12ab8 100644
--- a/SOAP/core/combine_chunks.py
+++ b/SOAP/core/combine_chunks.py
@@ -420,7 +420,7 @@ def combine_chunks(
             fof_reg = None
             fof_com_unit = None
             fof_mass_unit = None
-        (fof_reg, fof_com_unit, fof_mass_unit) = comm_world.bcast(
+        fof_reg, fof_com_unit, fof_mass_unit = comm_world.bcast(
             (fof_reg, fof_com_unit, fof_mass_unit)
         )
 
diff --git a/SOAP/core/halo_tasks.py b/SOAP/core/halo_tasks.py
index 53f91598..0261771c 100644
--- a/SOAP/core/halo_tasks.py
+++ b/SOAP/core/halo_tasks.py
@@ -10,7 +10,6 @@
 from SOAP.particle_selection.halo_properties import SearchRadiusTooSmallError
 from SOAP.property_table import PropertyTable
 
-
 # Factor by which to increase search radius when looking for density threshold
 SEARCH_RADIUS_FACTOR = 1.2
 
diff --git a/SOAP/particle_filter/recently_heated_gas_filter.py b/SOAP/particle_filter/recently_heated_gas_filter.py
index 8cdb7721..63d4467d 100644
--- a/SOAP/particle_filter/recently_heated_gas_filter.py
+++ b/SOAP/particle_filter/recently_heated_gas_filter.py
@@ -16,7 +16,6 @@
 requires knowledge of the cosmology.
 """
 
-
 from astropy.cosmology import w0waCDM, z_at_value
 import astropy.constants as const
 import astropy.units as astropy_units
diff --git a/SOAP/particle_selection/aperture_properties.py b/SOAP/particle_selection/aperture_properties.py
index 5d0cbe4b..a14c9be1 100644
--- a/SOAP/particle_selection/aperture_properties.py
+++ b/SOAP/particle_selection/aperture_properties.py
@@ -760,6 +760,24 @@ def TotalSNIaRate(self) -> unyt.unyt_quantity:
             self.star_mask_ap
         ].sum()
 
+    @lazy_property
+    def ExSituFraction(self) -> unyt.unyt_quantity:
+        """
+        Mass fraction of bound stars that formed in a different subhalo.
+        """
+        if self.Nstar == 0:
+            return None
+
+        group_nr = self.get_dataset("PartType4/GroupNr_bound")[self.star_mask_all][
+            self.star_mask_ap
+        ]
+        birth_group_nr = self.get_dataset("PartType4/BirthHaloCatalogueIndex")[
+            self.star_mask_all
+        ][self.star_mask_ap]
+        ex_situ = group_nr != birth_group_nr
+
+        return self.star_mass_fraction[ex_situ].sum()
+
     @lazy_property
     def bh_mask_all(self) -> NDArray[bool]:
         """
@@ -3788,6 +3806,7 @@ class ApertureProperties(HaloProperty):
         "stellar_age_mw": False,
         "stellar_age_lw": False,
         "TotalSNIaRate": False,
+        "ExSituFraction": False,
         "HydrogenMass": False,
         "HeliumMass": False,
         "MolecularHydrogenMass": False,
diff --git a/SOAP/particle_selection/subhalo_properties.py b/SOAP/particle_selection/subhalo_properties.py
index bd2c94cd..66b1fa85 100644
--- a/SOAP/particle_selection/subhalo_properties.py
+++ b/SOAP/particle_selection/subhalo_properties.py
@@ -481,6 +481,22 @@ def stellar_age_lw(self) -> unyt.unyt_array:
         Lrtot = Lr.sum()
         return ((Lr / Lrtot) * self.stellar_ages).sum()
 
+    @lazy_property
+    def ExSituFraction(self) -> unyt.unyt_quantity:
+        """
+        Mass fraction of bound stars that formed in a different subhalo.
+        """
+        if self.Nstar == 0:
+            return None
+
+        group_nr = self.get_dataset("PartType4/GroupNr_bound")[self.star_mask_all]
+        birth_group_nr = self.get_dataset("PartType4/BirthHaloCatalogueIndex")[
+            self.star_mask_all
+        ]
+        ex_situ = group_nr != birth_group_nr
+
+        return self.star_mass_fraction[ex_situ].sum()
+
     @lazy_property
     def bh_mask_all(self) -> NDArray[bool]:
         """
@@ -2455,6 +2471,7 @@ class SubhaloProperties(HaloProperty):
             "Lstar_luminosity_weighted",
             "stellar_age_mw",
             "stellar_age_lw",
+            "ExSituFraction",
             "Mgas_SF",
             "gasmetalfrac_SF",
             "MedianStellarBirthDensity",
diff --git a/SOAP/property_table.py b/SOAP/property_table.py
index 583f291f..1867517a 100644
--- a/SOAP/property_table.py
+++ b/SOAP/property_table.py
@@ -1905,6 +1905,22 @@ class PropertyTable:
             output_physical=True,
             a_scale_exponent=0,
         ),
+        "ExSituFraction": Property(
+            name="ExSituFraction",
+            shape=1,
+            dtype=np.float32,
+            unit="dimensionless",
+            description="Mass fraction of bound stars that formed in a different subhalo",
+            lossy_compression_filter="FMantissa9",
+            dmo_property=True,
+            particle_properties=[
+                "PartType4/Masses",
+                "PartType4/BirthHaloCatalogueIndex",
+                "PartType4/GroupNr_bound",
+            ],
+            output_physical=True,
+            a_scale_exponent=0,
+        ),
         "Mgas": Property(
             name="GasMass",
             shape=1,
@@ -5134,7 +5150,7 @@ def generate_tex_files(self, output_dir: str):
         # standalone table file footer
         tailstr = "\\end{document}"
 
-        # generate the auxilary documentation files
+        # generate the auxiliary documentation files
         with open(f"{output_dir}/timestamp.tex", "w") as ofile:
             ofile.write(get_version_string())
         with open(f"{output_dir}/table.tex", "w") as ofile:
diff --git a/compression/make_virtual_snapshot.py b/compression/make_virtual_snapshot.py
deleted file mode 100644
index 5f6e37f8..00000000
--- a/compression/make_virtual_snapshot.py
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/bin/env python
-
-import os.path
-import h5py
-import shutil
-
-
-def make_virtual_snapshot(snapshot, membership, output_file, snap_nr):
-    """
-    Given a FLAMINGO snapshot and group membership files,
-    create a new virtual snapshot with group info.
-    """
-
-    # Check which datasets exist in the membership files
-    # and store their attributes and datatype
-    filename = membership.format(file_nr=0, snap_nr=snap_nr)
-    dset_attrs = {}
-    dset_dtype = {}
-    with h5py.File(filename, "r") as infile:
-        for ptype in range(7):
-            if not f"PartType{ptype}" in infile:
-                continue
-            dset_attrs[f"PartType{ptype}"] = {}
-            dset_dtype[f"PartType{ptype}"] = {}
-            for dset in infile[f"PartType{ptype}"].keys():
-                attrs = dict(infile[f"PartType{ptype}/{dset}"].attrs)
-                dtype = infile[f"PartType{ptype}/{dset}"].dtype
-
-                # Some membership files are missing these attributes
-                if not "Value stored as physical" in attrs:
-                    print(f"Setting comoving attrs for PartType{ptype}/{dset}")
-                    attrs["Value stored as physical"] = [1]
-                    attrs["Property can be converted to comoving"] = [0]
-
-                # Add a flag that these are stored in the membership files
-                attrs["Auxilary file"] = [1]
-
-                # Store the values we need for later
-                dset_attrs[f"PartType{ptype}"][dset] = attrs
-                dset_dtype[f"PartType{ptype}"][dset] = dtype
-
-    # Copy the input virtual snapshot to the output
-    shutil.copyfile(snapshot, output_file)
-
-    # Open the output file
-    outfile = h5py.File(output_file, "r+")
-
-    # Loop over input membership files to get dataset shapes
-    file_nr = 0
-    filenames = []
-    shapes = []
-    counts = []
-    while True:
-        filename = membership.format(file_nr=file_nr, snap_nr=snap_nr)
-        if os.path.exists(filename):
-            filenames.append(filename)
-            with h5py.File(filename, "r") as infile:
-                shape = {}
-                count = {}
-                for ptype in range(7):
-                    if f"PartType{ptype}" not in dset_attrs:
-                        continue
-                    shape[f"PartType{ptype}"] = {}
-                    # Get the shape for each dataset
-                    for dset in dset_attrs[f"PartType{ptype}"]:
-                        s = infile[f"PartType{ptype}/{dset}"].shape
-                        shape[f"PartType{ptype}"][dset] = s
-                    # Get the number of particles in this chunk file
-                    count[f"PartType{ptype}"] = s[0]
-                shapes.append(shape)
-                counts.append(count)
-        else:
-            break
-        file_nr += 1
-    if file_nr == 0:
-        raise IOError(f"Failed to find files matching: {membership}")
-
-    # Loop over particle types in the output
-    for ptype in range(7):
-        if f"PartType{ptype}" not in dset_attrs:
-            continue
-
-        # Create virtual layout for new datasets
-        layouts = {}
-        nr_parts = sum([count[f"PartType{ptype}"] for count in counts])
-        for dset in dset_attrs[f"PartType{ptype}"]:
-            full_shape = list(shapes[0][f"PartType{ptype}"][dset])
-            full_shape[0] = nr_parts
-            full_shape = tuple(full_shape)
-            dtype = dset_dtype[f"PartType{ptype}"][dset]
-            layouts[dset] = h5py.VirtualLayout(shape=full_shape, dtype=dtype)
-
-        # Loop over input files
-        offset = 0
-        for filename, count, shape in zip(filenames, counts, shapes):
-            n_part = count[f"PartType{ptype}"]
-            for dset in dset_attrs[f"PartType{ptype}"]:
-                layouts[dset][offset : offset + n_part] = h5py.VirtualSource(
-                    filename,
-                    f"PartType{ptype}/{dset}",
-                    shape=shape[f"PartType{ptype}"][dset],
-                )
-            offset += n_part
-
-        # Create the virtual datasets, renaming datasets if they
-        # already exist in the snapshot
-        for dset, attrs in dset_attrs[f"PartType{ptype}"].items():
-            if f"PartType{ptype}/{dset}" in outfile:
-                outfile.move(f"PartType{ptype}/{dset}", f"PartType{ptype}/{dset}_snap")
-            outfile.create_virtual_dataset(
-                f"PartType{ptype}/{dset}", layouts[dset], fillvalue=-999
-            )
-            for k, v in attrs.items():
-                outfile[f"PartType{ptype}/{dset}"].attrs[k] = v
-
-            # Copy GroupNr_bound to HaloCatalogueIndex, since that is the name in SOAP
-            if dset == "GroupNr_bound":
-                outfile.create_virtual_dataset(
-                    f"PartType{ptype}/HaloCatalogueIndex",
-                    layouts["GroupNr_bound"],
-                    fillvalue=-999,
-                )
-                for k, v in outfile[f"PartType{ptype}/GroupNr_bound"].attrs.items():
-                    outfile[f"PartType{ptype}/HaloCatalogueIndex"].attrs[k] = v
-
-    # Done
-    outfile.close()
-
-
-if __name__ == "__main__":
-
-    import argparse
-    from update_vds_paths import update_virtual_snapshot_paths
-
-    # For description of parameters run the following: $ python make_virtual_snapshot.py --help
-    parser = argparse.ArgumentParser(
-        description=(
-            "Link SWIFT snapshots with SWIFT auxilary snapshots (snapshot-like"
-            "files with the same number of particles in the same order as the"
-            "snapshot, but with less metadata), such as the SOAP memberships"
-        )
-    )
-    parser.add_argument(
-        "virtual_snapshot",
-        type=str,
-        help="Name of the SWIFT virtual snapshot file, e.g. snapshot_{snap_nr:04}.hdf5",
-    )
-    parser.add_argument(
-        "membership",
-        type=str,
-        help="Format string for membership files, e.g. membership_{snap_nr:04}.{file_nr}.hdf5",
-    )
-    parser.add_argument(
-        "output_file",
-        type=str,
-        help="Name of the virtual snapshot to create, e.g. membership_{snap_nr:04}.hdf5",
-    )
-    parser.add_argument(
-        "snap_nr",
-        type=int,
-        nargs="?",
-        default=-1,
-        help="Snapshot number (default: -1). Not required if snap_nr is present in filenames passed.",
-    )
-    parser.add_argument(
-        "--absolute-paths",
-        action="store_true",
-        help="Use absolute paths in the virtual dataset",
-    )
-    args = parser.parse_args()
-
-    # Substitute snap number
-    virtual_snapshot = args.virtual_snapshot.format(snap_nr=args.snap_nr)
-    output_file = args.output_file.format(snap_nr=args.snap_nr)
-
-    # Make a new virtual snapshot with group info
-    make_virtual_snapshot(virtual_snapshot, args.membership, output_file, args.snap_nr)
-
-    # Set file paths for datasets
-    abs_snapshot_dir = os.path.abspath(os.path.dirname(virtual_snapshot))
-    abs_membership_dir = os.path.abspath(
-        os.path.dirname(args.membership.format(snap_nr=args.snap_nr, file_nr=0))
-    )
-    if args.absolute_paths:
-        # Ensure all paths in the virtual file are absolute to avoid VDS prefix issues
-        # (we probably need to pick up datasets from two different directories)
-        update_virtual_snapshot_paths(output_file, abs_snapshot_dir, abs_membership_dir)
-    else:
-        abs_output_dir = os.path.abspath(os.path.dirname(output_file))
-        rel_snapshot_dir = os.path.relpath(abs_snapshot_dir, abs_output_dir)
-        rel_membership_dir = os.path.relpath(abs_membership_dir, abs_output_dir)
-        update_virtual_snapshot_paths(output_file, rel_snapshot_dir, rel_membership_dir)
diff --git a/compression/update_vds_paths.py b/compression/update_vds_paths.py
deleted file mode 100644
index da24d57d..00000000
--- a/compression/update_vds_paths.py
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/bin/env python
-
-import sys
-import h5py
-import numpy as np
-import os.path
-
-
-def update_vds_paths(dset, modify_function):
-    """
-    Modify the virtual paths of the specified dataset
-
-    Note that querying the source dataspace and selection does not appear
-    to work (invalid pointer error from h5py) so here we assume that we're
-    referencing all of the source dataspace, which is correct for SWIFT
-    snapshots.
-
-    dset:            a h5py.Dataset object
-    modify_function: a function which takes the old path as its argument and
-                     returns the new path
-    """
-
-    # Choose a temporary path for the new virtual dataset
-    path = dset.name
-    tmp_path = dset.name + ".__tmp__"
-
-    # Build the creation property list for the new dataset
-    plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE)
-    for vs in dset.virtual_sources():
-        bounds = vs.vspace.get_select_bounds()
-        if bounds is not None:
-            lower, upper = bounds
-            size = np.asarray(upper, dtype=int) - np.asarray(lower, dtype=int) + 1
-            src_space = h5py.h5s.create_simple(tuple(size))
-            new_name = modify_function(vs.file_name)
-            plist.set_virtual(
-                vs.vspace, new_name.encode(), vs.dset_name.encode(), src_space
-            )
-
-    # Create the new dataset
-    tmp_dset = h5py.h5d.create(
-        dset.file["/"].id,
-        tmp_path.encode(),
-        dset.id.get_type(),
-        dset.id.get_space(),
-        dcpl=plist,
-    )
-    tmp_dset = h5py.Dataset(tmp_dset)
-    for attr_name in dset.attrs:
-        tmp_dset.attrs[attr_name] = dset.attrs[attr_name]
-
-    # Rename the new dataset
-    f = dset.file
-    del f[path]
-    f[path] = f[tmp_path]
-    del f[tmp_path]
-
-
-def update_virtual_snapshot_paths(filename, snapshot_dir=None, membership_dir=None):
-    """
-    Add full paths to virtual datasets in the specified file
-    """
-    f = h5py.File(filename, "r+")
-
-    # Find all datasets in the file
-    all_datasets = []
-
-    def visit_datasets(name, obj):
-        if isinstance(obj, h5py.Dataset):
-            all_datasets.append(obj)
-
-    f.visititems(visit_datasets)
-
-    def replace_snapshot_path(old_path):
-        basename = os.path.basename(old_path)
-        return os.path.join(snapshot_dir, basename)
-
-    def replace_membership_path(old_path):
-        basename = os.path.basename(old_path)
-        return os.path.join(membership_dir, basename)
-
-    # Loop over datasets and update paths if necessary
-    for dset in all_datasets:
-        if dset.is_virtual:
-            name = dset.name.split("/")[-1]
-            # Check if the dataset comes from a membership file
-            if dset.attrs.get("Auxilary file", [0])[0] == 1:
-                if membership_dir is not None:
-                    update_vds_paths(dset, replace_membership_path)
-            # Catch old datasets which didn't have the "Auxilary file" set
-            elif name in (
-                "GroupNr_all",
-                "GroupNr_bound",
-                "Rank_bound",
-                "HaloCatalogueIndex",
-                "SpecificPotentialEnergies",
-            ):
-                if membership_dir is not None:
-                    update_vds_paths(dset, replace_membership_path)
-            # Catch old case of FOF IDs from membership files
-            elif (name == "FOFGroupIDs") and ("PartType1/FOFGroupIDs_old" in f):
-                if membership_dir is not None:
-                    update_vds_paths(dset, replace_membership_path)
-            # Data comes from the snapshot files
-            else:
-                if snapshot_dir is not None:
-                    update_vds_paths(dset, replace_snapshot_path)
-
-    f.close()
-
-
-if __name__ == "__main__":
-
-    filename = sys.argv[1]  # Virtual snapshot file to update
-    snapshot_dir = sys.argv[2]  # Directory with the real snapshot files
-    membership_dir = sys.argv[3]  # Directory with the real membership files
-
-    update_virtual_snapshot_paths(filename, snapshot_dir, membership_dir)
diff --git a/format.sh b/format.sh
index f532a46d..1c142b87 100755
--- a/format.sh
+++ b/format.sh
@@ -20,7 +20,7 @@ fi
 black="./black_formatting_env/bin/python3 -m black"
 
 # Make sure we don't try and format any virtual environments
-files=$(echo {compression/*.py,misc/*.py,SOAP/*.py,SOAP/*/*.py,tests/*.py})
+files=$(echo {misc/*.py,SOAP/*.py,SOAP/*/*.py,tests/*.py})
 
 # Run formatting
 if [[ "$1" == "--check" ]]; then
diff --git a/misc/check_subhalo_ranking.py b/misc/check_subhalo_ranking.py
index e10d5ad9..9b94af6f 100644
--- a/misc/check_subhalo_ranking.py
+++ b/misc/check_subhalo_ranking.py
@@ -3,7 +3,6 @@
 import numpy as np
 import h5py
 
-
 # Read VR IDs and positions
 filename = "/cosma8/data/dp004/flamingo/Runs/L1000N0900/HYDRO_FIDUCIAL_DATA/HYDRO_FIDUCIAL/VR/catalogue_0077/vr_catalogue_0077.properties.0"
 with h5py.File(filename, "r") as infile:
diff --git a/misc/compute_BirthHaloCatalogueIndex.py b/misc/compute_BirthHaloCatalogueIndex.py
new file mode 100644
index 00000000..8926db16
--- /dev/null
+++ b/misc/compute_BirthHaloCatalogueIndex.py
@@ -0,0 +1,285 @@
+#!/bin/env python
+
+"""
+compute_BirthHaloCatalogueIndex.py
+
+This script produces an auxiliary snapshot which contains the subhalo
+id each star was part of when it first formed.
+
+Usage:
+
+  mpirun -- python -u misc/compute_BirthHaloCatalogueIndex.py \
+            --snap-basename SNAP_BASENAME \
+            --membership-basename MEMBERSHIP_BASENAME \
+            --output-basename OUTPUT_FILENAME \
+            --final-snap-nr FINAL_SNAP_NR
+
+Run "python misc/compute_BirthHaloCatalogueIndex.py -h" for a full description
+of the arguments, and a list of optional arguments.
+
+"""
+
+import argparse
+import datetime
+import os
+
+from mpi4py import MPI
+
+comm = MPI.COMM_WORLD
+comm_rank = comm.Get_rank()
+comm_size = comm.Get_size()
+
+import h5py
+import numpy as np
+
+import virgo.mpi.parallel_sort as psort
+import virgo.mpi.parallel_hdf5 as phdf5
+from virgo.mpi.gather_array import gather_array
+
+
+def load_particle_data(snap_basename, membership_basename, load_gas, comm):
+    """
+    Load the particle IDs and halo membership for the particle types
+    we will use to match. Removes unbound particles.
+    """
+
+    particle_data = {}
+
+    # Load particle IDs
+    snap_filename = snap_basename + ".{file_nr}.hdf5"
+    file = phdf5.MultiFile(
+        snap_filename, file_nr_attr=("Header", "NumFilesPerSnapshot"), comm=comm
+    )
+    particle_data["PartType4/ParticleIDs"] = file.read("PartType4/ParticleIDs")
+    if load_gas:
+        particle_data["PartType0/ParticleIDs"] = file.read("PartType0/ParticleIDs")
+
+    # Membership files don't have a header, so create a list of filenames
+    n_file = len(file.filenames)
+    membership_filenames = [f"{membership_basename}.{i}.hdf5" for i in range(n_file)]
+    # Load membership information
+    file = phdf5.MultiFile(
+        membership_filenames, file_nr_attr=("Header", "NumFilesPerSnapshot"), comm=comm
+    )
+    particle_data["PartType4/GroupNr_bound"] = file.read("PartType4/GroupNr_bound")
+    if load_gas:
+        particle_data["PartType0/GroupNr_bound"] = file.read("PartType0/GroupNr_bound")
+
+    # Check the two files are partitioned the same way
+    assert (
+        particle_data["PartType4/GroupNr_bound"].shape
+        == particle_data["PartType4/ParticleIDs"].shape
+    )
+    if load_gas:
+        assert (
+            particle_data["PartType0/GroupNr_bound"].shape
+            == particle_data["PartType0/ParticleIDs"].shape
+        )
+
+    return particle_data
+
+
+# Units for the dimensionless fields we will be saving
+unit_attrs = {
+    "Conversion factor to CGS (not including cosmological corrections)": [1.0],
+    "Conversion factor to physical CGS (including cosmological corrections)": [1.0],
+    "U_I exponent": [0.0],
+    "U_L exponent": [0.0],
+    "U_M exponent": [0.0],
+    "U_t exponent": [0.0],
+    "U_T exponent": [0.0],
+    "a-scale exponent": [0.0],
+    "h-scale exponent": [0.0],
+    "Property can be converted to comoving": [0],
+    "Value stored as physical": [1],
+}
+
+
+def mpi_print(string, comm_rank):
+    if comm_rank == 0:
+        print(string)
+
+
+if __name__ == "__main__":
+
+    start_time = datetime.datetime.now()
+
+    parser = argparse.ArgumentParser(
+        description=("Script to calculate BirthHaloCatalogueIndex of star particles"),
+    )
+    parser.add_argument(
+        "--snap-basename",
+        type=str,
+        required=True,
+        help=(
+            "The basename of the snapshot files (the snapshot "
+            "name without the .{file_nr}.hdf5 suffix. Use "
+            "{snap_nr:04d} instead of the snapshot number)"
+        ),
+    )
+    parser.add_argument(
+        "--membership-basename",
+        type=str,
+        required=True,
+        help="The basename of the membership files",
+    )
+    parser.add_argument(
+        "--output-basename",
+        type=str,
+        required=True,
+        help="The basename of the output files",
+    )
+    parser.add_argument(
+        "--final-snap-nr",
+        type=int,
+        required=True,
+        help=("Snapshot at which to load the particles"),
+    )
+    parser.add_argument(
+        "--calculate-PreBirthHaloCatalogueIndex",
+        action="store_true",
+        help=(
+            "Whether to calculate and output the subhalo halo catalogue "
+            "index of the gas particle that formed each star"
+        ),
+    )
+
+    args = parser.parse_args()
+
+    # Log the arguments
+    for k, v in vars(args).items():
+        mpi_print(f"  {k}: {v}", comm_rank)
+
+    final_snap_basename = args.snap_basename.format(snap_nr=args.final_snap_nr)
+    final_membership_basename = args.membership_basename.format(
+        snap_nr=args.final_snap_nr
+    )
+    mpi_print("Loading stars from final snapshot", comm_rank)
+    particle_data = load_particle_data(
+        final_snap_basename,
+        final_membership_basename,
+        False,
+        comm,
+    )
+    star_particle_ids = particle_data["PartType4/ParticleIDs"]
+    star_birth_ids = particle_data["PartType4/GroupNr_bound"]
+    star_birth_ids[:] = -99
+    star_first_snapshot = np.copy(star_birth_ids)
+
+    if args.calculate_PreBirthHaloCatalogueIndex:
+        particle_data["PartType0/ParticleIDs"] = np.ones(0)
+        particle_data["PartType0/GroupNr_bound"] = np.ones(0)
+        star_prebirth_ids = np.copy(star_birth_ids)
+
+    for snap_nr in range(0, args.final_snap_nr + 1):
+
+        mpi_print(f"Loading data from snapshot {snap_nr}", comm_rank)
+        if args.calculate_PreBirthHaloCatalogueIndex:
+            # We need to keep the gas IDs from snapshot N-1
+            gas_particle_ids = particle_data["PartType0/ParticleIDs"]
+            gas_group_nr = particle_data["PartType0/GroupNr_bound"]
+        snap_basename = args.snap_basename.format(snap_nr=snap_nr)
+        membership_basename = args.membership_basename.format(snap_nr=snap_nr)
+        particle_data = load_particle_data(
+            snap_basename,
+            membership_basename,
+            args.calculate_PreBirthHaloCatalogueIndex,
+            comm,
+        )
+
+        mpi_print(f"Matching stars", comm_rank)
+        # It would be quicker to make use of the BirthScaleFactors
+        # instead of checking all stars
+        idx = psort.parallel_match(
+            star_particle_ids[star_birth_ids == -99],
+            particle_data["PartType4/ParticleIDs"],
+            comm=comm,
+        )
+
+        new_birth_ids = psort.fetch_elements(
+            particle_data["PartType4/GroupNr_bound"],
+            idx[idx != -1],
+            comm=comm,
+        )
+
+        has_new_birth_id = star_birth_ids == -99
+        has_new_birth_id[has_new_birth_id] = idx != -1
+        star_birth_ids[has_new_birth_id] = new_birth_ids
+        star_first_snapshot[has_new_birth_id] = snap_nr
+
+        if args.calculate_PreBirthHaloCatalogueIndex:
+            mpi_print(f"Matching gas", comm_rank)
+            # Identify the gas progenitor of the newly formed stars
+            gas_idx = psort.parallel_match(
+                star_particle_ids[has_new_birth_id],
+                gas_particle_ids,
+                comm=comm,
+            )
+            # The gas progenitor may not exist for all stars due
+            # to particle splitting. Note this information is
+            # recoverable if required by using the SplitTrees.
+            new_prebirth_ids = -99 * np.ones_like(new_birth_ids)
+            new_prebirth_ids[gas_idx != -1] = psort.fetch_elements(
+                gas_group_nr,
+                gas_idx[gas_idx != -1],
+                comm=comm,
+            )
+            star_prebirth_ids[has_new_birth_id] = new_prebirth_ids
+
+    # Check we found a value for every star
+    assert np.sum(star_birth_ids == -99) == 0
+
+    # Set up what we want to output
+    output = {
+        "BirthHaloCatalogueIndex": star_birth_ids,
+        "FirstSnapshot": star_first_snapshot,
+    }
+    attrs = {
+        "BirthHaloCatalogueIndex": {
+            "Description": "The HaloCatalogueIndex of this particle at the first snapshot it appeared."
+        },
+        "FirstSnapshot": {
+            "Description": "Index of the first simulation snapshot in which the star particle is present."
+        },
+    }
+    attrs["BirthHaloCatalogueIndex"].update(unit_attrs)
+    attrs["FirstSnapshot"].update(unit_attrs)
+    if args.calculate_PreBirthHaloCatalogueIndex:
+        output["PreBirthHaloCatalogueIndex"] = star_prebirth_ids
+        attrs["PreBirthHaloCatalogueIndex"] = {
+            "Description": "The HaloCatalogueIndex of gas prognitor at the snapshot before the star formed. -99 if no gas progenitor is found."
+        }
+        attrs["PreBirthHaloCatalogueIndex"].update(unit_attrs)
+
+    # Check the output directory exists
+    output_filename = (
+        args.output_basename.format(snap_nr=args.final_snap_nr) + ".{file_nr}.hdf5"
+    )
+    if comm_rank == 0:
+        output_dir = os.path.dirname(output_filename)
+        os.makedirs(output_dir, exist_ok=True)
+    comm.barrier()
+
+    # Write the output
+    mpi_print("Writing output", comm_rank)
+    snap_file = phdf5.MultiFile(
+        final_snap_basename + ".{file_nr}.hdf5",
+        file_nr_attr=("Header", "NumFilesPerSnapshot"),
+        comm=comm,
+    )
+    elements_per_file = snap_file.get_elements_per_file(
+        "ParticleIDs", group="PartType4"
+    )
+    snap_file.write(
+        output,
+        elements_per_file,
+        filenames=output_filename,
+        mode="w",
+        group="PartType4",
+        attrs=attrs,
+    )
+
+    # Finished
+    comm.barrier()
+    mpi_print(f"Runtime: {datetime.datetime.now() - start_time}", comm_rank)
+    mpi_print("Done!", comm_rank)
diff --git a/parameter_files/COLIBRE_HYBRID.yml b/parameter_files/COLIBRE_HYBRID.yml
index 0f9b3f6e..4f823849 100644
--- a/parameter_files/COLIBRE_HYBRID.yml
+++ b/parameter_files/COLIBRE_HYBRID.yml
@@ -244,6 +244,7 @@ ApertureProperties:
     StellarCylindricalVelocityDispersionLuminosityWeighted: false
     StellarCylindricalVelocityDispersionVerticalLuminosityWeighted: false
     StellarCylindricalVelocityDispersionDiscPlaneLuminosityWeighted: false
+    ExSituFraction: false
     TotalMass: true
     TotalSNIaRate: true
     GasMassInColdDenseDiffuseMetals:
@@ -742,6 +743,7 @@ SubhaloProperties:
     StellarCylindricalVelocityDispersionLuminosityWeighted: false
     StellarCylindricalVelocityDispersionVerticalLuminosityWeighted: false
     StellarCylindricalVelocityDispersionDiscPlaneLuminosityWeighted: false
+    ExSituFraction: false
 aliases:
   PartType0/LastSNIIKineticFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent
   PartType0/LastSNIIThermalFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent
diff --git a/parameter_files/COLIBRE_THERMAL.yml b/parameter_files/COLIBRE_THERMAL.yml
index 403267a5..7e2eb418 100644
--- a/parameter_files/COLIBRE_THERMAL.yml
+++ b/parameter_files/COLIBRE_THERMAL.yml
@@ -244,6 +244,7 @@ ApertureProperties:
     StellarCylindricalVelocityDispersionDiscPlaneLuminosityWeighted: false
     StellarRotationalVelocity: false
     StellarRotationalVelocityLuminosityWeighted: false
+    ExSituFraction: false
     TotalMass: true
     TotalSNIaRate: true
     GasMassInColdDenseDiffuseMetals:
@@ -742,6 +743,7 @@ SubhaloProperties:
     StellarCylindricalVelocityDispersionLuminosityWeighted: false
     StellarCylindricalVelocityDispersionVerticalLuminosityWeighted: false
     StellarCylindricalVelocityDispersionDiscPlaneLuminosityWeighted: false
+    ExSituFraction: false
 aliases:
   PartType0/LastSNIIKineticFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent
   PartType0/LastSNIIThermalFeedbackDensities: PartType0/DensitiesAtLastSupernovaEvent
diff --git a/scripts/COLIBRE/compress_group_membership.sh b/scripts/COLIBRE/compress_group_membership.sh
index ee34965f..f04ec4c8 100644
--- a/scripts/COLIBRE/compress_group_membership.sh
+++ b/scripts/COLIBRE/compress_group_membership.sh
@@ -90,7 +90,10 @@ echo "Creating virtual snapshot"
 snapshot="${output_dir}/${sim}/snapshots/colibre_${snapnum}/colibre_${snapnum}.hdf5"
 membership="${output_filename}.{file_nr}.hdf5"
 virtual="${outbase}/colibre_with_SOAP_membership_${snapnum}.hdf5"
-python compression/make_virtual_snapshot.py $snapshot $membership $virtual
+python SOAP/compression/make_virtual_snapshot.py \
+  --virtual-snapshot $snapshot \
+  --auxiliary-snapshots $membership \
+  --output-file $virtual
 
 echo "Setting virtual file to be read-only"
 chmod a=r "${virtual}"
diff --git a/scripts/COLIBRE/compress_halo_properties.sh b/scripts/COLIBRE/compress_halo_properties.sh
index b75a7632..5acbca36 100755
--- a/scripts/COLIBRE/compress_halo_properties.sh
+++ b/scripts/COLIBRE/compress_halo_properties.sh
@@ -29,7 +29,7 @@ output_dir="/cosma8/data/dp004/dc-mcgi1/COLIBRE/Runs"
 scratch_dir="/snap8/scratch/dp004/dc-mcgi1/COLIBRE/Runs"
 
 # compression script
-script="./compression/compress_soap_catalogue.py"
+script="./SOAP/compression/compress_soap_catalogue.py"
 
 # Which snapshot to do
 snapnum=`printf '%04d' ${SLURM_ARRAY_TASK_ID}`
@@ -41,7 +41,7 @@ sim="${SLURM_JOB_NAME}"
 input_filename="${input_dir}/${sim}/SOAP_uncompressed/halo_properties_${snapnum}.hdf5"
 
 # Location and name of the output SOAP catalogue
-outbase="${output_dir}/${sim}/SOAP"
+outbase="${output_dir}/${sim}/SOAP-ExSitu"
 mkdir -p $outbase
 output_filename="${outbase}/halo_properties_${snapnum}.hdf5"
 
diff --git a/scripts/COLIBRE/compute_birth_index.sh b/scripts/COLIBRE/compute_birth_index.sh
new file mode 100755
index 00000000..35181cbb
--- /dev/null
+++ b/scripts/COLIBRE/compute_birth_index.sh
@@ -0,0 +1,47 @@
+#!/bin/bash -l
+
+#SBATCH --cpus-per-task=1
+#SBATCH -o ./logs/birth_track_id_%j.out
+#SBATCH -p cosma8
+#SBATCH -A dp004
+#SBATCH -J BirthHaloCatalogueIndex
+#SBATCH --nodes=4
+#SBATCH -t 24:00:00
+# N0752: 1 node, 2 hours
+# N1504: 1 node, 12 hours
+# N3008: 4 nodes, 24 hours
+
+set -e
+
+# TODO: Set these values
+base_dir="/cosma8/data/dp004/dc-mcgi1/COLIBRE/BirthHaloCatalogueIndex"
+output_dir="/cosma8/data/dp004/dc-mcgi1/COLIBRE/BirthHaloCatalogueIndex"
+sim="L0400N3008/Thermal"
+snapnum="0127"
+
+snap_basename="${base_dir}/${sim}/snapshots/colibre_{snap_nr:04d}/colibre_{snap_nr:04d}"
+membership_basename="${base_dir}/${sim}/SOAP-HBT/membership_{snap_nr:04d}/membership_{snap_nr:04d}"
+output_basename="${output_dir}/${sim}/SOAP-ExSitu/birth_${snapnum}/birth_${snapnum}"
+
+mpirun -- python misc/compute_BirthHaloCatalogueIndex.py \
+  --snap-basename ${snap_basename} \
+  --membership-basename ${membership_basename} \
+  --output-basename ${output_basename} \
+  --final-snap-nr ${snapnum} \
+  --calculate-PreBirthHaloCatalogueIndex
+
+chmod a=r "${output_basename}"*
+
+snapshot="${snap_basename}.hdf5"
+membership="${membership_basename}.{file_nr}.hdf5"
+output="${output_basename}.{file_nr}.hdf5"
+virtual="${output_dir}/${sim}/SOAP-ExSitu/birth_${snapnum}.hdf5"
+python SOAP/compression/make_virtual_snapshot.py \
+  --virtual-snapshot "$snapshot" \
+  --auxiliary-snapshots "$membership" "$output" \
+  --output-file "$virtual" \
+  --snap-nr "$snapnum" \
+
+chmod a=r "${virtual}"
+
+echo "Job complete!"
diff --git a/scripts/EAGLE.sh b/scripts/EAGLE.sh
index 8f87ef72..9c83e6f4 100755
--- a/scripts/EAGLE.sh
+++ b/scripts/EAGLE.sh
@@ -77,10 +77,10 @@ cd "${output_dir}/swift_snapshots/swift_${snap_nr}"
 python "${soap_dir}/create_virtual_snapshot.py" "snap_${snap_nr}.0.hdf5"
 cd -
 
-python compression/make_virtual_snapshot.py \
-    "${output_dir}/swift_snapshots/swift_${snap_nr}/snap_${snap_nr}.hdf5" \
-    "${output_dir}/SOAP_uncompressed/membership_${snap_nr}/membership_${snap_nr}.{file_nr}.hdf5" \
-    "${output_dir}/SOAP_uncompressed/snap_${snap_nr}.hdf5" \
+python SOAP/compression/make_virtual_snapshot.py \
+    --virtual-snapshot "${output_dir}/swift_snapshots/swift_${snap_nr}/snap_${snap_nr}.hdf5" \
+    --auxiliary-snapshots "${output_dir}/SOAP_uncompressed/membership_${snap_nr}/membership_${snap_nr}.{file_nr}.hdf5" \
+    --output-file "${output_dir}/SOAP_uncompressed/snap_${snap_nr}.hdf5"
 
 ######### Run SOAP
 
diff --git a/scripts/FLAMINGO/L1000N1800/compress_halo_properties_L1000N1800.sh b/scripts/FLAMINGO/L1000N1800/compress_halo_properties_L1000N1800.sh
index 60d202d0..ea032de7 100644
--- a/scripts/FLAMINGO/L1000N1800/compress_halo_properties_L1000N1800.sh
+++ b/scripts/FLAMINGO/L1000N1800/compress_halo_properties_L1000N1800.sh
@@ -29,7 +29,7 @@ output_dir="/cosma8/data/dp004/dc-mcgi1/FLAMINGO/Runs"
 scratch_dir="/snap8/scratch/dp004/dc-mcgi1/FLAMINGO/Runs"
 
 # compression script
-script="./compression/compress_soap_catalogue.py"
+script="./SOAP/compression/compress_soap_catalogue.py"
 
 # Which snapshot to do
 snapnum=`printf '%04d' ${SLURM_ARRAY_TASK_ID}`
diff --git a/tests/COLIBRE/run_L0025N0188_Thermal.sh b/tests/COLIBRE/run_L0025N0188_Thermal.sh
index 27a43e1e..58969b78 100755
--- a/tests/COLIBRE/run_L0025N0188_Thermal.sh
+++ b/tests/COLIBRE/run_L0025N0188_Thermal.sh
@@ -30,7 +30,7 @@ python tests/COLIBRE/create_parameters_file.py
 rm -r output/SOAP-tmp
 
 # Run SOAP on eight cores processing the selected halos. Use 'python3 -m pdb' to start in the debugger.
-mpirun -np 8 python3 -u -m mpi4py SOAP/compute_halo_properties.py \
+mpirun -np 8 python SOAP/compute_halo_properties.py \
        ./tests/COLIBRE/test_parameters.yml \
        --halo-indices ${halo_indices} \
        --sim-name=${sim} --snap-nr=${snapnum} --chunks=1
diff --git a/tests/test_SO_properties.py b/tests/test_SO_properties.py
index 1ba8c03c..bd4459a4 100644
--- a/tests/test_SO_properties.py
+++ b/tests/test_SO_properties.py
@@ -419,7 +419,7 @@ def calculate_SO_properties_nfw_halo(seed, num_part, c):
         "crit",
     )
 
-    (input_halo, data, rmax, Mtot, Npart, particle_numbers) = dummy_halos.gen_nfw_halo(
+    input_halo, data, rmax, Mtot, Npart, particle_numbers = dummy_halos.gen_nfw_halo(
         100, c, num_part
     )