@@ -589,6 +589,157 @@ def simplify(adata: AnnData, n_nodes: int = 10, copy: bool = False):
589589 return adata if copy else None
590590
591591
592+ def merge_small_segments (adata : AnnData , copy : bool = False ):
593+ """\
594+ Merge segments that have no cells assigned into their neighboring nodes.
595+
596+ Small segments (typically between two high-degree nodes like forks) can end up
597+ with no cells assigned during pseudotime calculation. This function merges
598+ such segments by collapsing intermediate nodes into the earlier milestone,
599+ resolving downstream analysis failures.
600+
601+ This function should be called after pseudotime calculation if warnings about
602+ segments having no cells assigned are encountered.
603+
604+ Parameters
605+ ----------
606+ adata
607+ Annotated data matrix with computed pseudotime.
608+ copy
609+ Return a copy instead of writing to adata.
610+
611+ Returns
612+ -------
613+ adata : anndata.AnnData
614+ if `copy=True` it returns or else updates fields in `adata`:
615+
616+ `.uns['graph']['B']`
617+ updated adjacency matrix with merged nodes.
618+ `.uns['graph']['F']`
619+ updated coordinates with merged nodes.
620+ `.obsm['X_R']`
621+ updated soft assignment matrix with merged nodes.
622+ `.uns['graph']['pp_seg']`
623+ updated segment definitions.
624+ `.uns['graph']['pp_info']`
625+ updated node information.
626+ """
627+
628+ logg .info ("merging small segments without cells" , reset = True )
629+
630+ adata = adata .copy () if copy else adata
631+
632+ if "t" not in adata .obs :
633+ raise ValueError (
634+ "You need to run `tl.pseudotime` before merging small segments."
635+ )
636+
637+ graph = adata .uns ["graph" ]
638+ pp_seg = graph ["pp_seg" ].copy ()
639+ pp_info = graph ["pp_info" ].copy ()
640+ B = graph ["B" ].copy ()
641+ F = graph ["F" ].copy ()
642+ R = adata .obsm ["X_R" ].copy ()
643+
644+ # Identify segments with no cells assigned
645+ cell_segs = adata .obs .seg .value_counts ()
646+ all_segs = pp_seg .n .values
647+ empty_segs = [s for s in all_segs if s not in cell_segs .index ]
648+
649+ if len (empty_segs ) == 0 :
650+ logg .info (" no empty segments found" , time = False )
651+ return adata if copy else None
652+
653+ logg .info (f" found { len (empty_segs )} empty segment(s): { empty_segs } " , time = False )
654+
655+ # Build graph for path finding
656+ g = igraph .Graph .Adjacency ((B > 0 ).tolist (), mode = "undirected" )
657+
658+ # Track nodes to remove
659+ nodes_to_remove = set ()
660+
661+ for seg_id in empty_segs :
662+ seg_row = pp_seg .loc [pp_seg .n == seg_id ].iloc [0 ]
663+ from_node = int (seg_row ["from" ])
664+ to_node = int (seg_row ["to" ])
665+
666+ # Get all nodes in this segment
667+ seg_nodes = pp_info .index [pp_info .seg == seg_id ].tolist ()
668+
669+ # Identify intermediate nodes (not the from/to milestones)
670+ intermediate_nodes = [n for n in seg_nodes if n != from_node and n != to_node ]
671+
672+ # The from_node is earlier in pseudotime, we keep it
673+ # Merge intermediate nodes and to_node into from_node
674+
675+ # Combine soft assignments: add R columns of removed nodes to from_node
676+ for node in intermediate_nodes + [to_node ]:
677+ if node < R .shape [1 ]:
678+ R [:, from_node ] = R [:, from_node ] + R [:, node ]
679+ nodes_to_remove .add (node )
680+
681+ # Update adjacency: connect from_node to to_node's neighbors
682+ if to_node < B .shape [0 ]:
683+ to_neighbors = np .where (B [to_node , :] > 0 )[0 ]
684+ for neighbor in to_neighbors :
685+ if neighbor != from_node and neighbor not in nodes_to_remove :
686+ B [from_node , neighbor ] = 1
687+ B [neighbor , from_node ] = 1
688+
689+ if len (nodes_to_remove ) == 0 :
690+ logg .info (" no nodes to merge" , time = False )
691+ return adata if copy else None
692+
693+ # Remove nodes from matrices
694+ nodes_to_keep = [i for i in range (B .shape [0 ]) if i not in nodes_to_remove ]
695+ nodes_to_keep = np .array (nodes_to_keep )
696+
697+ B = B [np .ix_ (nodes_to_keep , nodes_to_keep )]
698+ F = F [:, nodes_to_keep ]
699+ R = R [:, nodes_to_keep ]
700+
701+ # Normalize R
702+ Rsum = R .sum (axis = 1 )
703+ Rsum [Rsum == 0 ] = 1 # Avoid division by zero
704+ R = R / Rsum .reshape (- 1 , 1 )
705+
706+ # Update graph structures
707+ g = igraph .Graph .Adjacency ((B > 0 ).tolist (), mode = "undirected" )
708+ tips = np .argwhere (np .array (g .degree ()) == 1 ).flatten ()
709+ forks = np .argwhere (np .array (g .degree ()) > 2 ).flatten ()
710+
711+ # Update adata
712+ adata .uns ["graph" ]["B" ] = B
713+ adata .uns ["graph" ]["F" ] = F
714+ adata .uns ["graph" ]["tips" ] = tips
715+ adata .uns ["graph" ]["forks" ] = forks
716+ adata .obsm ["X_R" ] = R
717+
718+ # Create mapping from old to new indices
719+ old_to_new = {old : new for new , old in enumerate (nodes_to_keep )}
720+
721+ # Update milestones
722+ if "milestones" in graph :
723+ new_milestones = {}
724+ for name , old_idx in graph ["milestones" ].items ():
725+ if old_idx in old_to_new :
726+ new_milestones [name ] = old_to_new [old_idx ]
727+ adata .uns ["graph" ]["milestones" ] = new_milestones
728+
729+ # Update root
730+ if "root" in graph and graph ["root" ] in old_to_new :
731+ adata .uns ["graph" ]["root" ] = old_to_new [graph ["root" ]]
732+
733+ # Recalculate pseudotime with new graph structure
734+ root (adata , adata .uns ["graph" ]["root" ])
735+ pseudotime (adata )
736+
737+ n_removed = len (nodes_to_remove )
738+ logg .info (" finished" , time = True , end = " " if settings .verbosity > 2 else "\n " )
739+ logg .hint (f"merged { n_removed } nodes from { len (empty_segs )} empty segment(s)" )
740+
741+ return adata if copy else None
742+
592743def getpath (adata , root_milestone , milestones , include_root = False ):
593744 """\
594745 Obtain dataframe of cell of a given path.
0 commit comments