From d37fe170b803f60237baf00e7fd124d4b9f1a2a0 Mon Sep 17 00:00:00 2001 From: Yan Wong Date: Wed, 24 Dec 2025 10:44:49 +0000 Subject: [PATCH] Clarity individual table sorting requirements before subset --- python/tskit/tables.py | 10 +++++++--- python/tskit/trees.py | 11 +++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/python/tskit/tables.py b/python/tskit/tables.py index d61f864593..3ce3e64c75 100644 --- a/python/tskit/tables.py +++ b/python/tskit/tables.py @@ -3816,9 +3816,13 @@ def canonicalise(self, remove_unreferenced=None): """ This puts the tables in *canonical* form, imposing a stricter order on the tables than :ref:`required ` for - a valid tree sequence. In particular, the individual - and population tables are sorted by the first node that refers to each - (see :meth:`TreeSequence.subset`). Then, the remaining tables are sorted + a valid tree sequence. In particular, the population table is sorted by + the lowest ID of the nodes that refer to each population, and the + individual table is sorted to ensure that not only are the standard + :ref:`sorting requirements` met but + also secondarily sorted by the lowest ID of the nodes + that refer to each individual (see :meth:`TreeSequence.subset`). + The remaining tables are sorted as in :meth:`.sort`, with the modification that mutations are sorted by site, then time (if known), then the mutation's node's time, then number of descendant mutations (ensuring that parent mutations occur before diff --git a/python/tskit/trees.py b/python/tskit/trees.py index 7775231dde..2cb9710780 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -7513,10 +7513,13 @@ def subset( the ancestry of these nodes - for that, see :meth:`.simplify`. This has the side effect that it may change the order of the nodes, - individuals, populations, and migrations in the tree sequence: the nodes - in the new tree sequence will be in the order provided in ``nodes``, and - both individuals and populations will be ordered by the earliest retained - node that refers to them. (However, ``reorder_populations`` may be set to + individuals, populations, and migrations in the tree sequence. Nodes + in the new tree sequence will be in the order provided in ``nodes``, + populations will be ordered by the lowest ID of the nodes that refer to + them, and individuals will be not only ordered so that + :attr:`~Individual.parents` come before children, but in addition + will be secondarily sorted by the lowest ID of their referring + nodes. (However, ``reorder_populations`` may be set to False to keep the population table unchanged.) By default, the method removes all individuals and populations not