From 4df245eba36f67caba550c1f46e35c4625ef4743 Mon Sep 17 00:00:00 2001 From: Akhilesh Halageri Date: Thu, 29 May 2025 14:53:58 +0000 Subject: [PATCH 01/14] feat: stitching support in v3 --- pychunkedgraph/graph/chunkedgraph.py | 3 +- pychunkedgraph/graph/edges/__init__.py | 10 ++- pychunkedgraph/graph/edits.py | 55 ++++++++++----- pychunkedgraph/graph/operation.py | 73 ++++++++++++-------- pychunkedgraph/graph/utils/serializers.py | 20 +++--- pychunkedgraph/ingest/create/atomic_layer.py | 16 +++-- pychunkedgraph/ingest/create/parent_layer.py | 4 +- pychunkedgraph/ingest/ran_agglomeration.py | 6 +- pychunkedgraph/meshing/meshgen.py | 3 + requirements.in | 7 +- requirements.txt | 46 ++++-------- 11 files changed, 134 insertions(+), 109 deletions(-) diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py index 183420979..0cdfb1885 100644 --- a/pychunkedgraph/graph/chunkedgraph.py +++ b/pychunkedgraph/graph/chunkedgraph.py @@ -1,5 +1,4 @@ # pylint: disable=invalid-name, missing-docstring, too-many-lines, import-outside-toplevel, unsupported-binary-operation - import time import typing import datetime @@ -778,6 +777,7 @@ def add_edges( source_coords: typing.Sequence[int] = None, sink_coords: typing.Sequence[int] = None, allow_same_segment_merge: typing.Optional[bool] = False, + stitch_mode: typing.Optional[bool] = False, ) -> operation.GraphEditOperation.Result: """ Adds an edge to the chunkedgraph @@ -794,6 +794,7 @@ def add_edges( source_coords=source_coords, sink_coords=sink_coords, allow_same_segment_merge=allow_same_segment_merge, + stitch_mode=stitch_mode, ).execute() def remove_edges( diff --git a/pychunkedgraph/graph/edges/__init__.py b/pychunkedgraph/graph/edges/__init__.py index 16c3ec557..cb07212a5 100644 --- a/pychunkedgraph/graph/edges/__init__.py +++ b/pychunkedgraph/graph/edges/__init__.py @@ -54,22 +54,20 @@ def __init__( affinities: Optional[np.ndarray] = None, areas: Optional[np.ndarray] = None, ): - self.node_ids1 = np.array(node_ids1, dtype=basetypes.NODE_ID, copy=False) - self.node_ids2 = np.array(node_ids2, dtype=basetypes.NODE_ID, copy=False) + self.node_ids1 = np.array(node_ids1, dtype=basetypes.NODE_ID) + self.node_ids2 = np.array(node_ids2, dtype=basetypes.NODE_ID) assert self.node_ids1.size == self.node_ids2.size self._as_pairs = None if affinities is not None and len(affinities) > 0: - self._affinities = np.array( - affinities, dtype=basetypes.EDGE_AFFINITY, copy=False - ) + self._affinities = np.array(affinities, dtype=basetypes.EDGE_AFFINITY) assert self.node_ids1.size == self._affinities.size else: self._affinities = np.full(len(self.node_ids1), DEFAULT_AFFINITY) if areas is not None and len(areas) > 0: - self._areas = np.array(areas, dtype=basetypes.EDGE_AREA, copy=False) + self._areas = np.array(areas, dtype=basetypes.EDGE_AREA) assert self.node_ids1.size == self._areas.size else: self._areas = np.full(len(self.node_ids1), DEFAULT_AREA) diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py index e4a052919..b95694849 100644 --- a/pychunkedgraph/graph/edits.py +++ b/pychunkedgraph/graph/edits.py @@ -68,7 +68,9 @@ def _analyze_affected_edges( def _get_relevant_components(edges: np.ndarray, supervoxels: np.ndarray) -> Tuple: - edges = np.concatenate([edges, np.vstack([supervoxels, supervoxels]).T]) + edges = np.concatenate([edges, np.vstack([supervoxels, supervoxels]).T]).astype( + basetypes.NODE_ID + ) graph, _, _, graph_ids = flatgraph.build_gt_graph(edges, make_directed=True) ccs = flatgraph.connected_components(graph) relevant_ccs = [] @@ -107,8 +109,10 @@ def merge_preprocess( active_edges.append(active) inactive_edges.append(inactive) - relevant_ccs = _get_relevant_components(np.concatenate(active_edges), supervoxels) - inactive = np.concatenate(inactive_edges) + relevant_ccs = _get_relevant_components( + np.concatenate(active_edges).astype(basetypes.NODE_ID), supervoxels + ) + inactive = np.concatenate(inactive_edges).astype(basetypes.NODE_ID) _inactive = [types.empty_2d] # source to sink edges source_mask = np.in1d(inactive[:, 0], relevant_ccs[0]) @@ -119,7 +123,7 @@ def merge_preprocess( sink_mask = np.in1d(inactive[:, 1], relevant_ccs[0]) source_mask = np.in1d(inactive[:, 0], relevant_ccs[1]) _inactive.append(inactive[source_mask & sink_mask]) - _inactive = np.concatenate(_inactive) + _inactive = np.concatenate(_inactive).astype(basetypes.NODE_ID) return np.unique(_inactive, axis=0) if _inactive.size else types.empty_2d @@ -187,14 +191,15 @@ def add_edges( time_stamp: datetime.datetime = None, parent_ts: datetime.datetime = None, allow_same_segment_merge=False, + stitch_mode: bool = False, ): edges, l2_cross_edges_d = _analyze_affected_edges( cg, atomic_edges, parent_ts=parent_ts ) l2ids = np.unique(edges) - if not allow_same_segment_merge: + if not allow_same_segment_merge and not stitch_mode: roots = cg.get_roots(l2ids, assert_roots=True, time_stamp=parent_ts) - assert np.unique(roots).size == 2, "L2 IDs must belong to different roots." + assert np.unique(roots).size >= 2, "L2 IDs must belong to different roots." new_old_id_d = defaultdict(set) old_new_id_d = defaultdict(set) @@ -217,7 +222,9 @@ def add_edges( # update cache # map parent to new merged children and vice versa - merged_children = np.concatenate([atomic_children_d[l2id] for l2id in l2ids_]) + merged_children = np.concatenate( + [atomic_children_d[l2id] for l2id in l2ids_] + ).astype(basetypes.NODE_ID) cg.cache.children_cache[new_id] = merged_children cache_utils.update(cg.cache.parents_cache, merged_children, new_id) @@ -244,6 +251,7 @@ def add_edges( operation_id=operation_id, time_stamp=time_stamp, parent_ts=parent_ts, + stitch_mode=stitch_mode, ) new_roots = create_parents.run() @@ -285,9 +293,8 @@ def _split_l2_agglomeration( cross_edges = cross_edges[~in2d(cross_edges, removed_edges)] isolated_ids = agg.supervoxels[~np.in1d(agg.supervoxels, chunk_edges)] isolated_edges = np.column_stack((isolated_ids, isolated_ids)) - graph, _, _, graph_ids = flatgraph.build_gt_graph( - np.concatenate([chunk_edges, isolated_edges]), make_directed=True - ) + _edges = np.concatenate([chunk_edges, isolated_edges]).astype(basetypes.NODE_ID) + graph, _, _, graph_ids = flatgraph.build_gt_graph(_edges, make_directed=True) return flatgraph.connected_components(graph), graph_ids, cross_edges @@ -331,7 +338,7 @@ def remove_edges( old_hierarchy_d = _init_old_hierarchy(cg, l2ids, parent_ts=parent_ts) chunk_id_map = dict(zip(l2ids.tolist(), cg.get_chunk_ids_from_node_ids(l2ids))) - removed_edges = np.concatenate([atomic_edges, atomic_edges[:, ::-1]], axis=0) + removed_edges = np.concatenate([atomic_edges, atomic_edges[:, ::-1]], axis=0).astype(basetypes.NODE_ID) new_l2_ids = [] for id_ in l2ids: agg = l2id_agglomeration_d[id_] @@ -391,7 +398,7 @@ def _get_flipped_ids(id_map, node_ids): for id_ in node_ids ] ids.append(types.empty_1d) # concatenate needs at least one array - return np.concatenate(ids) + return np.concatenate(ids).astype(basetypes.NODE_ID) def _get_descendants(cg, new_id): @@ -443,7 +450,7 @@ def _update_neighbor_cross_edges_single( edges = fastremap.remap(edges, node_map, preserve_missing_labels=True) if layer == counterpart_layer: reverse_edge = np.array([counterpart, new_id], dtype=basetypes.NODE_ID) - edges = np.concatenate([edges, [reverse_edge]]) + edges = np.concatenate([edges, [reverse_edge]]).astype(basetypes.NODE_ID) descendants = _get_descendants(cg, new_id) mask = np.isin(edges[:, 1], descendants) if np.any(mask): @@ -510,6 +517,7 @@ def __init__( old_new_id_d: Dict[np.uint64, Set[np.uint64]] = None, old_hierarchy_d: Dict[np.uint64, Dict[int, np.uint64]] = None, parent_ts: datetime.datetime = None, + stitch_mode: bool = False, ): self.cg = cg self.new_entries = [] @@ -521,6 +529,7 @@ def __init__( self._operation_id = operation_id self._time_stamp = time_stamp self._last_successful_ts = parent_ts + self.stitch_mode = stitch_mode def _update_id_lineage( self, @@ -552,7 +561,7 @@ def _get_connected_components(self, node_ids: np.ndarray, layer: int): for id_ in node_ids: edges_ = cross_edges_d[id_].get(layer, types.empty_2d) cx_edges.append(edges_) - cx_edges = np.concatenate([*cx_edges, np.vstack([node_ids, node_ids]).T]) + cx_edges = np.concatenate([*cx_edges, np.vstack([node_ids, node_ids]).T]).astype(basetypes.NODE_ID) graph, _, _, graph_ids = flatgraph.build_gt_graph(cx_edges, make_directed=True) return flatgraph.connected_components(graph), graph_ids @@ -568,7 +577,7 @@ def _get_layer_node_ids( mask = np.in1d(siblings, old_ids) node_ids = np.concatenate( [_get_flipped_ids(self._old_new_id_d, old_ids), siblings[~mask], new_ids] - ) + ).astype(basetypes.NODE_ID) node_ids = np.unique(node_ids) layer_mask = self.cg.get_chunk_layers(node_ids) == layer return node_ids[layer_mask] @@ -635,10 +644,16 @@ def _create_new_parents(self, layer: int): if len(cx_edges_d[cc_ids[0]].get(l, types.empty_2d)) > 0: parent_layer = l break - parent = self.cg.id_client.create_node_id( - self.cg.get_parent_chunk_id(cc_ids[0], parent_layer), - root_chunk=parent_layer == self.cg.meta.layer_count, - ) + + while True: + parent = self.cg.id_client.create_node_id( + self.cg.get_parent_chunk_id(cc_ids[0], parent_layer), + root_chunk=parent_layer == self.cg.meta.layer_count, + ) + _entry = self.cg.client.read_node(parent) + if _entry == {}: + break + self._new_ids_d[parent_layer].append(parent) self._update_id_lineage(parent, cc_ids, layer, parent_layer) self.cg.cache.children_cache[parent] = cc_ids @@ -689,6 +704,8 @@ def run(self) -> Iterable: return self._new_ids_d[self.cg.meta.layer_count] def _update_root_id_lineage(self): + if self.stitch_mode: + return new_roots = self._new_ids_d[self.cg.meta.layer_count] former_roots = _get_flipped_ids(self._new_old_id_d, new_roots) former_roots = np.unique(former_roots) diff --git a/pychunkedgraph/graph/operation.py b/pychunkedgraph/graph/operation.py index 8c5d4484e..a07045cff 100644 --- a/pychunkedgraph/graph/operation.py +++ b/pychunkedgraph/graph/operation.py @@ -555,6 +555,7 @@ class MergeOperation(GraphEditOperation): "affinities", "bbox_offset", "allow_same_segment_merge", + "stitch_mode", ] def __init__( @@ -568,6 +569,7 @@ def __init__( bbox_offset: Tuple[int, int, int] = (240, 240, 24), affinities: Optional[Sequence[np.float32]] = None, allow_same_segment_merge: Optional[bool] = False, + stitch_mode: bool = False, ) -> None: super().__init__( cg, user_id=user_id, source_coords=source_coords, sink_coords=sink_coords @@ -575,6 +577,7 @@ def __init__( self.added_edges = np.atleast_2d(added_edges).astype(basetypes.NODE_ID) self.bbox_offset = np.atleast_1d(bbox_offset).astype(basetypes.COORDINATES) self.allow_same_segment_merge = allow_same_segment_merge + self.stitch_mode = stitch_mode self.affinities = None if affinities is not None: @@ -605,34 +608,43 @@ def _apply( ) ) if len(root_ids) < 2 and not self.allow_same_segment_merge: - raise PreconditionError("Supervoxels must belong to different objects.") - bbox = get_bbox(self.source_coords, self.sink_coords, self.bbox_offset) - with TimeIt("subgraph", self.cg.graph_id, operation_id): - edges = self.cg.get_subgraph( - root_ids, - bbox=bbox, - bbox_is_coordinate=True, - edges_only=True, + raise PreconditionError( + "Supervoxels must belong to different objects." + f" Tried to merge {self.added_edges.ravel()}," + f" which all belong to {tuple(root_ids)[0]}." ) - if self.allow_same_segment_merge: - inactive_edges = types.empty_2d - else: - with TimeIt("preprocess", self.cg.graph_id, operation_id): - inactive_edges = edits.merge_preprocess( - self.cg, - subgraph_edges=edges, - supervoxels=self.added_edges.ravel(), - parent_ts=self.parent_ts, + atomic_edges = self.added_edges + fake_edge_rows = [] + if not self.stitch_mode: + bbox = get_bbox(self.source_coords, self.sink_coords, self.bbox_offset) + with TimeIt("subgraph", self.cg.graph_id, operation_id): + edges = self.cg.get_subgraph( + root_ids, + bbox=bbox, + bbox_is_coordinate=True, + edges_only=True, ) - atomic_edges, fake_edge_rows = edits.check_fake_edges( - self.cg, - atomic_edges=self.added_edges, - inactive_edges=inactive_edges, - time_stamp=timestamp, - parent_ts=self.parent_ts, - ) + if self.allow_same_segment_merge: + inactive_edges = types.empty_2d + else: + with TimeIt("preprocess", self.cg.graph_id, operation_id): + inactive_edges = edits.merge_preprocess( + self.cg, + subgraph_edges=edges, + supervoxels=self.added_edges.ravel(), + parent_ts=self.parent_ts, + ) + + atomic_edges, fake_edge_rows = edits.check_fake_edges( + self.cg, + atomic_edges=self.added_edges, + inactive_edges=inactive_edges, + time_stamp=timestamp, + parent_ts=self.parent_ts, + ) + with TimeIt("add_edges", self.cg.graph_id, operation_id): new_roots, new_l2_ids, new_entries = edits.add_edges( self.cg, @@ -641,6 +653,7 @@ def _apply( time_stamp=timestamp, parent_ts=self.parent_ts, allow_same_segment_merge=self.allow_same_segment_merge, + stitch_mode=self.stitch_mode, ) return new_roots, new_l2_ids, fake_edge_rows + new_entries @@ -857,12 +870,14 @@ def __init__( "try placing the points further apart." ) - ids = np.concatenate([self.source_ids, self.sink_ids]) + ids = np.concatenate([self.source_ids, self.sink_ids]).astype(basetypes.NODE_ID) layers = self.cg.get_chunk_layers(ids) assert np.sum(layers) == layers.size, "IDs must be supervoxels." def _update_root_ids(self) -> np.ndarray: - sink_and_source_ids = np.concatenate((self.source_ids, self.sink_ids)) + sink_and_source_ids = np.concatenate((self.source_ids, self.sink_ids)).astype( + basetypes.NODE_ID + ) root_ids = np.unique( self.cg.get_roots( sink_and_source_ids, assert_roots=True, time_stamp=self.parent_ts @@ -878,7 +893,9 @@ def _apply( # Verify that sink and source are from the same root object root_ids = set( self.cg.get_roots( - np.concatenate([self.source_ids, self.sink_ids]), + np.concatenate([self.source_ids, self.sink_ids]).astype( + basetypes.NODE_ID + ), assert_roots=True, time_stamp=self.parent_ts, ) @@ -899,7 +916,7 @@ def _apply( edges = reduce(lambda x, y: x + y, edges_tuple, Edges([], [])) supervoxels = np.concatenate( [agg.supervoxels for agg in l2id_agglomeration_d.values()] - ) + ).astype(basetypes.NODE_ID) mask0 = np.in1d(edges.node_ids1, supervoxels) mask1 = np.in1d(edges.node_ids2, supervoxels) edges = edges[mask0 & mask1] diff --git a/pychunkedgraph/graph/utils/serializers.py b/pychunkedgraph/graph/utils/serializers.py index 09c0f63b0..3b0101d86 100644 --- a/pychunkedgraph/graph/utils/serializers.py +++ b/pychunkedgraph/graph/utils/serializers.py @@ -41,7 +41,9 @@ def _deserialize(val, dtype, shape=None, order=None): def __init__(self, dtype, shape=None, order=None, compression_level=None): super().__init__( - serializer=lambda x: x.newbyteorder(dtype.byteorder).tobytes(), + serializer=lambda x: x.view( + x.dtype.newbyteorder(dtype.byteorder) + ).tobytes(), deserializer=lambda x: NumPyArray._deserialize( x, dtype, shape=shape, order=order ), @@ -53,7 +55,9 @@ def __init__(self, dtype, shape=None, order=None, compression_level=None): class NumPyValue(_Serializer): def __init__(self, dtype): super().__init__( - serializer=lambda x: x.newbyteorder(dtype.byteorder).tobytes(), + serializer=lambda x: x.view( + x.dtype.newbyteorder(dtype.byteorder) + ).tobytes(), deserializer=lambda x: np.frombuffer(x, dtype=dtype)[0], basetype=dtype.type, ) @@ -96,7 +100,7 @@ def __init__(self): def pad_node_id(node_id: np.uint64) -> str: - """ Pad node id to 20 digits + """Pad node id to 20 digits :param node_id: int :return: str @@ -105,7 +109,7 @@ def pad_node_id(node_id: np.uint64) -> str: def serialize_uint64(node_id: np.uint64, counter=False, fake_edges=False) -> bytes: - """ Serializes an id to be ingested by a bigtable table row + """Serializes an id to be ingested by a bigtable table row :param node_id: int :return: str @@ -118,7 +122,7 @@ def serialize_uint64(node_id: np.uint64, counter=False, fake_edges=False) -> byt def serialize_uint64s_to_regex(node_ids: Iterable[np.uint64]) -> bytes: - """ Serializes an id to be ingested by a bigtable table row + """Serializes an id to be ingested by a bigtable table row :param node_id: int :return: str @@ -128,7 +132,7 @@ def serialize_uint64s_to_regex(node_ids: Iterable[np.uint64]) -> bytes: def deserialize_uint64(node_id: bytes, fake_edges=False) -> np.uint64: - """ De-serializes a node id from a BigTable row + """De-serializes a node id from a BigTable row :param node_id: bytes :return: np.uint64 @@ -139,7 +143,7 @@ def deserialize_uint64(node_id: bytes, fake_edges=False) -> np.uint64: def serialize_key(key: str) -> bytes: - """ Serializes a key to be ingested by a bigtable table row + """Serializes a key to be ingested by a bigtable table row :param key: str :return: bytes @@ -148,7 +152,7 @@ def serialize_key(key: str) -> bytes: def deserialize_key(key: bytes) -> str: - """ Deserializes a row key + """Deserializes a row key :param key: bytes :return: str diff --git a/pychunkedgraph/ingest/create/atomic_layer.py b/pychunkedgraph/ingest/create/atomic_layer.py index 0a7aae728..e235d36d4 100644 --- a/pychunkedgraph/ingest/create/atomic_layer.py +++ b/pychunkedgraph/ingest/create/atomic_layer.py @@ -68,8 +68,10 @@ def _get_chunk_nodes_and_edges(chunk_edges_d: dict, isolated_ids: Sequence[int]) in-chunk edges and nodes_ids """ isolated_nodes_self_edges = np.vstack([isolated_ids, isolated_ids]).T - node_ids = [isolated_ids] - edge_ids = [isolated_nodes_self_edges] + node_ids = [isolated_ids] if len(isolated_ids) != 0 else [] + edge_ids = ( + [isolated_nodes_self_edges] if len(isolated_nodes_self_edges) != 0 else [] + ) for edge_type in EDGE_TYPES: edges = chunk_edges_d[edge_type] node_ids.append(edges.node_ids1) @@ -77,9 +79,9 @@ def _get_chunk_nodes_and_edges(chunk_edges_d: dict, isolated_ids: Sequence[int]) node_ids.append(edges.node_ids2) edge_ids.append(edges.get_pairs()) - chunk_node_ids = np.unique(np.concatenate(node_ids)) + chunk_node_ids = np.unique(np.concatenate(node_ids).astype(basetypes.NODE_ID)) edge_ids.append(np.vstack([chunk_node_ids, chunk_node_ids]).T) - return (chunk_node_ids, np.concatenate(edge_ids)) + return (chunk_node_ids, np.concatenate(edge_ids).astype(basetypes.NODE_ID)) def _get_remapping(chunk_edges_d: dict): @@ -116,7 +118,7 @@ def _process_component( r_key = serializers.serialize_uint64(node_id) nodes.append(cg.client.mutate_row(r_key, val_dict, time_stamp=time_stamp)) - chunk_out_edges = np.concatenate(chunk_out_edges) + chunk_out_edges = np.concatenate(chunk_out_edges).astype(basetypes.NODE_ID) cce_layers = cg.get_cross_chunk_edges_layer(chunk_out_edges) u_cce_layers = np.unique(cce_layers) @@ -147,5 +149,7 @@ def _get_outgoing_edges(node_id, chunk_edges_d, sparse_indices, remapping): ] row_ids = row_ids[column_ids == 0] # edges that this node is part of - chunk_out_edges = np.concatenate([chunk_out_edges, edges[row_ids]]) + chunk_out_edges = np.concatenate([chunk_out_edges, edges[row_ids]]).astype( + basetypes.NODE_ID + ) return chunk_out_edges diff --git a/pychunkedgraph/ingest/create/parent_layer.py b/pychunkedgraph/ingest/create/parent_layer.py index 90b24d26a..dfdb48dac 100644 --- a/pychunkedgraph/ingest/create/parent_layer.py +++ b/pychunkedgraph/ingest/create/parent_layer.py @@ -73,7 +73,7 @@ def _read_children_chunks( children_ids = [types.empty_1d] for child_coord in children_coords: children_ids.append(_read_chunk([], cg, layer_id - 1, child_coord)) - return np.concatenate(children_ids) + return np.concatenate(children_ids).astype(basetypes.NODE_ID) with mp.Manager() as manager: children_ids_shared = manager.list() @@ -92,7 +92,7 @@ def _read_children_chunks( multi_args, n_threads=min(len(multi_args), mp.cpu_count()), ) - return np.concatenate(children_ids_shared) + return np.concatenate(children_ids_shared).astype(basetypes.NODE_ID) def _read_chunk_helper(args): diff --git a/pychunkedgraph/ingest/ran_agglomeration.py b/pychunkedgraph/ingest/ran_agglomeration.py index a0ca42d54..d726ba4a5 100644 --- a/pychunkedgraph/ingest/ran_agglomeration.py +++ b/pychunkedgraph/ingest/ran_agglomeration.py @@ -314,7 +314,9 @@ def get_active_edges(edges_d, mapping): if edge_type == EDGE_TYPES.in_chunk: pseudo_isolated_ids.append(edges.node_ids2) - return chunk_edges_active, np.unique(np.concatenate(pseudo_isolated_ids)) + return chunk_edges_active, np.unique( + np.concatenate(pseudo_isolated_ids).astype(basetypes.NODE_ID) + ) def define_active_edges(edge_dict, mapping) -> Union[Dict, np.ndarray]: @@ -380,7 +382,7 @@ def read_raw_agglomeration_data(imanager: IngestionManager, chunk_coord: np.ndar edges_list = _read_agg_files(filenames, chunk_ids, path) G = nx.Graph() - G.add_edges_from(np.concatenate(edges_list)) + G.add_edges_from(np.concatenate(edges_list).astype(basetypes.NODE_ID)) mapping = {} components = list(nx.connected_components(G)) for i_cc, cc in enumerate(components): diff --git a/pychunkedgraph/meshing/meshgen.py b/pychunkedgraph/meshing/meshgen.py index a8da89b1f..1fd4cf5b4 100644 --- a/pychunkedgraph/meshing/meshgen.py +++ b/pychunkedgraph/meshing/meshgen.py @@ -935,6 +935,7 @@ def chunk_initial_mesh_task( cv = CloudVolume( f"graphene://https://localhost/segmentation/table/dummy", info=meshgen_utils.get_json_info(cg), + secrets={"token": "dummy"}, ) sharding_info = cv.mesh.meta.info["sharding"]["2"] sharding_spec = ShardingSpecification.from_dict(sharding_info) @@ -1123,6 +1124,7 @@ def chunk_stitch_remeshing_task( f"graphene://https://localhost/segmentation/table/dummy", mesh_dir=cv_sharded_mesh_dir, info=meshgen_utils.get_json_info(cg), + secrets={"token": "dummy"}, ) fragments_in_batch_processed = 0 @@ -1257,6 +1259,7 @@ def chunk_initial_sharded_stitching_task( cv = CloudVolume( f"graphene://https://localhost/segmentation/table/dummy", info=meshgen_utils.get_json_info(cg), + secrets={"token": "dummy"}, ) shard_filenames = [] shard_to_chunk_id = {} diff --git a/requirements.in b/requirements.in index 4fcd353ed..ec2ee3c29 100644 --- a/requirements.in +++ b/requirements.in @@ -15,18 +15,17 @@ rq>2 pyyaml cachetools werkzeug -tensorstore # PyPI only: -cloud-files>=4.21.1 -cloud-volume>=8.26.0 +cloud-files>=5.3.0 +cloud-volume>=12.2.0 multiwrapper middle-auth-client>=3.11.0 zmesh>=1.7.0 fastremap>=1.14.0 task-queue>=2.13.0 messagingclient -dracopy>=1.3.0 +dracopy>=1.5.0 datastoreflex>=0.5.0 zstandard==0.21.0 diff --git a/requirements.txt b/requirements.txt index 0eedacb31..4ab8f90c1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -41,22 +41,18 @@ click==8.1.7 # -r requirements.in # cloud-files # compressed-segmentation - # compresso # flask + # microviewer # rq # task-queue -cloud-files==4.21.1 +cloud-files==5.3.0 # via # -r requirements.in # cloud-volume # datastoreflex -cloud-volume==8.26.0 +cloud-volume==12.2.0 # via -r requirements.in -compressed-segmentation==2.2.1 - # via cloud-volume -compresso==3.2.1 - # via cloud-volume -crackle-codec==0.7.0 +compressed-segmentation==2.3.2 # via cloud-volume crc32c==2.3.post0 # via cloud-files @@ -68,7 +64,7 @@ dill==0.3.7 # via # multiprocess # pathos -dracopy==1.3.0 +dracopy==1.5.0 # via # -r requirements.in # cloud-volume @@ -78,7 +74,7 @@ fastremap==1.14.0 # via # -r requirements.in # cloud-volume - # crackle-codec + # osteoid flask==2.3.3 # via # -r requirements.in @@ -86,8 +82,6 @@ flask==2.3.3 # middle-auth-client flask-cors==4.0.0 # via -r requirements.in -fpzip==1.2.2 - # via cloud-volume furl==2.1.3 # via middle-auth-client gevent==23.9.1 @@ -189,10 +183,10 @@ markupsafe==2.1.3 # werkzeug messagingclient==0.1.3 # via -r requirements.in +microviewer==1.13.1 + # via cloud-volume middle-auth-client==3.16.1 # via -r requirements.in -ml-dtypes==0.3.2 - # via tensorstore multiprocess==0.70.15 # via pathos multiwrapper==0.1.1 @@ -201,24 +195,20 @@ networkx==3.1 # via # -r requirements.in # cloud-volume + # osteoid numpy==1.26.0 # via # -r requirements.in # cloud-volume # compressed-segmentation - # compresso - # crackle-codec # fastremap - # fpzip # messagingclient - # ml-dtypes + # microviewer # multiwrapper + # osteoid # pandas - # pyspng-seunglab # simplejpeg # task-queue - # tensorstore - # zfpc # zmesh orderedmultidict==1.0.1 # via furl @@ -226,6 +216,8 @@ orjson==3.9.7 # via # cloud-files # task-queue +osteoid==0.3.1 + # via cloud-volume packaging==23.1 # via pytest pandas==2.1.1 @@ -237,8 +229,6 @@ pathos==0.3.1 # task-queue pbr==5.11.1 # via task-queue -pillow==10.0.1 - # via cloud-volume pluggy==1.3.0 # via pytest posix-ipc==1.1.1 @@ -273,12 +263,8 @@ pyasn1==0.5.0 # rsa pyasn1-modules==0.3.0 # via google-auth -pybind11==2.11.1 - # via crackle-codec pysimdjson==5.0.2 # via cloud-volume -pyspng-seunglab==1.1.0 - # via cloud-volume pytest==7.4.2 # via compressed-segmentation python-dateutil==2.8.2 @@ -340,8 +326,6 @@ tenacity==8.2.3 # cloud-files # cloud-volume # task-queue -tensorstore==0.1.53 - # via -r requirements.in tqdm==4.66.1 # via # cloud-files @@ -360,10 +344,6 @@ werkzeug==2.3.8 # via # -r requirements.in # flask -zfpc==0.1.2 - # via cloud-volume -zfpy==1.0.0 - # via zfpc zmesh==1.7.0 # via -r requirements.in zope-event==5.0 From 589ca526e136a27f19ace6ff8f1e25dca03de011 Mon Sep 17 00:00:00 2001 From: Akhilesh Halageri Date: Thu, 29 May 2025 15:05:01 +0000 Subject: [PATCH 02/14] fix: add tensorstore to req --- requirements.in | 1 + requirements.txt | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/requirements.in b/requirements.in index ec2ee3c29..bf735af22 100644 --- a/requirements.in +++ b/requirements.in @@ -15,6 +15,7 @@ rq>2 pyyaml cachetools werkzeug +tensorstore # PyPI only: cloud-files>=5.3.0 diff --git a/requirements.txt b/requirements.txt index 4ab8f90c1..35014d4de 100644 --- a/requirements.txt +++ b/requirements.txt @@ -187,6 +187,8 @@ microviewer==1.13.1 # via cloud-volume middle-auth-client==3.16.1 # via -r requirements.in +ml-dtypes==0.5.1 + # via tensorstore multiprocess==0.70.15 # via pathos multiwrapper==0.1.1 @@ -204,11 +206,13 @@ numpy==1.26.0 # fastremap # messagingclient # microviewer + # ml-dtypes # multiwrapper # osteoid # pandas # simplejpeg # task-queue + # tensorstore # zmesh orderedmultidict==1.0.1 # via furl @@ -326,6 +330,8 @@ tenacity==8.2.3 # cloud-files # cloud-volume # task-queue +tensorstore==0.1.75 + # via -r requirements.in tqdm==4.66.1 # via # cloud-files From 77073341502aa10cc6c9a30093f1c53a46adae31 Mon Sep 17 00:00:00 2001 From: Dodam Ih Date: Thu, 29 May 2025 18:54:27 -0700 Subject: [PATCH 03/14] fix: more numpy 2 fixes --- pychunkedgraph/graph/chunks/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pychunkedgraph/graph/chunks/utils.py b/pychunkedgraph/graph/chunks/utils.py index 3b6e19665..d597e688e 100644 --- a/pychunkedgraph/graph/chunks/utils.py +++ b/pychunkedgraph/graph/chunks/utils.py @@ -98,7 +98,7 @@ def get_chunk_coordinates_multiple(meta, ids: np.ndarray) -> np.ndarray: y_offset = x_offset - bits_per_dim z_offset = y_offset - bits_per_dim - ids = np.array(ids, dtype=int, copy=False) + ids = np.asarray(ids, dtype=int) X = ids >> x_offset & 2**bits_per_dim - 1 Y = ids >> y_offset & 2**bits_per_dim - 1 Z = ids >> z_offset & 2**bits_per_dim - 1 @@ -153,7 +153,7 @@ def get_chunk_ids_from_node_ids(meta, ids: Iterable[np.uint64]) -> np.ndarray: bits_per_dims = np.array([meta.bitmasks[l] for l in get_chunk_layers(meta, ids)]) offsets = 64 - meta.graph_config.LAYER_ID_BITS - 3 * bits_per_dims - ids = np.array(ids, dtype=int, copy=False) + ids = np.asarray(ids, dtype=int) cids1 = np.array((ids >> offsets) << offsets, dtype=np.uint64) # cids2 = np.vectorize(get_chunk_id)(meta, ids) # assert np.all(cids1 == cids2) From 6642b7b73857440fa7146960fce499ad70aade9f Mon Sep 17 00:00:00 2001 From: Dodam Ih Date: Fri, 30 May 2025 15:05:42 -0700 Subject: [PATCH 04/14] fix: more numpy2 fixes --- pychunkedgraph/graph/cache.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pychunkedgraph/graph/cache.py b/pychunkedgraph/graph/cache.py index 13fa962ae..2cde01723 100644 --- a/pychunkedgraph/graph/cache.py +++ b/pychunkedgraph/graph/cache.py @@ -79,7 +79,7 @@ def cross_edges_decorated(node_id): return cross_edges_decorated(node_id) def parents_multiple(self, node_ids: np.ndarray, *, time_stamp: datetime = None): - node_ids = np.array(node_ids, dtype=NODE_ID, copy=False) + node_ids = np.asarray(node_ids, dtype=NODE_ID) if not node_ids.size: return node_ids mask = np.in1d(node_ids, np.fromiter(self.parents_cache.keys(), dtype=NODE_ID)) @@ -93,7 +93,7 @@ def parents_multiple(self, node_ids: np.ndarray, *, time_stamp: datetime = None) def children_multiple(self, node_ids: np.ndarray, *, flatten=False): result = {} - node_ids = np.array(node_ids, dtype=NODE_ID, copy=False) + node_ids = np.asarray(node_ids, dtype=NODE_ID) if not node_ids.size: return result mask = np.in1d(node_ids, np.fromiter(self.children_cache.keys(), dtype=NODE_ID)) @@ -111,7 +111,7 @@ def cross_chunk_edges_multiple( self, node_ids: np.ndarray, *, time_stamp: datetime = None ): result = {} - node_ids = np.array(node_ids, dtype=NODE_ID, copy=False) + node_ids = np.asarray(node_ids, dtype=NODE_ID) if not node_ids.size: return result mask = np.in1d( From f63dc4230ccbbfd2e33aa8956f4c9b7e141c0de6 Mon Sep 17 00:00:00 2001 From: Dodam Ih Date: Fri, 30 May 2025 21:47:14 -0700 Subject: [PATCH 05/14] fix: even more numpy2 fixes --- pychunkedgraph/graph/edits.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py index b95694849..c09f54a3d 100644 --- a/pychunkedgraph/graph/edits.py +++ b/pychunkedgraph/graph/edits.py @@ -394,7 +394,7 @@ def _get_flipped_ids(id_map, node_ids): returns old or new ids according to the map """ ids = [ - np.array(list(id_map[id_]), dtype=basetypes.NODE_ID, copy=False) + np.asarray(list(id_map[id_]), dtype=basetypes.NODE_ID) for id_ in node_ids ] ids.append(types.empty_1d) # concatenate needs at least one array From d31d506798dcbf3df8c4afb26fd5b1f4d4a2c521 Mon Sep 17 00:00:00 2001 From: Dodam Ih Date: Tue, 17 Jun 2025 16:58:12 -0700 Subject: [PATCH 06/14] debug: sanity check in add_atomic_chunk --- pychunkedgraph/ingest/create/atomic_layer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pychunkedgraph/ingest/create/atomic_layer.py b/pychunkedgraph/ingest/create/atomic_layer.py index e235d36d4..eb8d9b43b 100644 --- a/pychunkedgraph/ingest/create/atomic_layer.py +++ b/pychunkedgraph/ingest/create/atomic_layer.py @@ -36,6 +36,8 @@ def add_atomic_chunk( chunk_ids = cg.get_chunk_ids_from_node_ids(chunk_node_ids) assert len(np.unique(chunk_ids)) == 1 + for chunk_id in chunk_ids: + assert not cg.range_read_chunk(cg.get_parent_chunk_id(chunk_id)) graph, _, _, unique_ids = build_gt_graph(chunk_edge_ids, make_directed=True) ccs = connected_components(graph) From da85407623045af45b801e8da5af1f5edce8aa74 Mon Sep 17 00:00:00 2001 From: Akhilesh Halageri Date: Mon, 19 Jan 2026 04:10:51 +0000 Subject: [PATCH 07/14] fix(ingest): remove empty parent chunk assert --- pychunkedgraph/ingest/create/atomic_layer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pychunkedgraph/ingest/create/atomic_layer.py b/pychunkedgraph/ingest/create/atomic_layer.py index eb8d9b43b..e235d36d4 100644 --- a/pychunkedgraph/ingest/create/atomic_layer.py +++ b/pychunkedgraph/ingest/create/atomic_layer.py @@ -36,8 +36,6 @@ def add_atomic_chunk( chunk_ids = cg.get_chunk_ids_from_node_ids(chunk_node_ids) assert len(np.unique(chunk_ids)) == 1 - for chunk_id in chunk_ids: - assert not cg.range_read_chunk(cg.get_parent_chunk_id(chunk_id)) graph, _, _, unique_ids = build_gt_graph(chunk_edge_ids, make_directed=True) ccs = connected_components(graph) From 1876f56d57089720c82eebbe4d2901e10537d56d Mon Sep 17 00:00:00 2001 From: Dodam Ih Date: Thu, 22 Jan 2026 21:42:44 -0800 Subject: [PATCH 08/14] chore: numpy 2 compatibility --- pychunkedgraph/app/segmentation/common.py | 4 +-- pychunkedgraph/graph/cache.py | 6 ++-- pychunkedgraph/graph/chunkedgraph.py | 4 +-- pychunkedgraph/graph/cutting.py | 44 +++++++++++------------ pychunkedgraph/graph/edits.py | 18 +++++----- pychunkedgraph/graph/misc.py | 4 +-- pychunkedgraph/graph/operation.py | 6 ++-- pychunkedgraph/graph/utils/id_helpers.py | 2 +- pychunkedgraph/meshing/manifest/utils.py | 2 +- pychunkedgraph/meshing/meshgen.py | 6 ++-- pychunkedgraph/utils/general.py | 2 +- 11 files changed, 49 insertions(+), 49 deletions(-) diff --git a/pychunkedgraph/app/segmentation/common.py b/pychunkedgraph/app/segmentation/common.py index 70642c9ce..cd1a0e9b8 100644 --- a/pychunkedgraph/app/segmentation/common.py +++ b/pychunkedgraph/app/segmentation/common.py @@ -791,8 +791,8 @@ def handle_subgraph(table_id, root_id, only_internal_edges=True): supervoxels = np.concatenate( [agg.supervoxels for agg in l2id_agglomeration_d.values()] ) - mask0 = np.in1d(edges.node_ids1, supervoxels) - mask1 = np.in1d(edges.node_ids2, supervoxels) + mask0 = np.isin(edges.node_ids1, supervoxels) + mask1 = np.isin(edges.node_ids2, supervoxels) edges = edges[mask0 & mask1] return edges diff --git a/pychunkedgraph/graph/cache.py b/pychunkedgraph/graph/cache.py index 2cde01723..355a9fbe3 100644 --- a/pychunkedgraph/graph/cache.py +++ b/pychunkedgraph/graph/cache.py @@ -82,7 +82,7 @@ def parents_multiple(self, node_ids: np.ndarray, *, time_stamp: datetime = None) node_ids = np.asarray(node_ids, dtype=NODE_ID) if not node_ids.size: return node_ids - mask = np.in1d(node_ids, np.fromiter(self.parents_cache.keys(), dtype=NODE_ID)) + mask = np.isin(node_ids, np.fromiter(self.parents_cache.keys(), dtype=NODE_ID)) parents = node_ids.copy() parents[mask] = self._parent_vec(node_ids[mask]) parents[~mask] = self._cg.get_parents( @@ -96,7 +96,7 @@ def children_multiple(self, node_ids: np.ndarray, *, flatten=False): node_ids = np.asarray(node_ids, dtype=NODE_ID) if not node_ids.size: return result - mask = np.in1d(node_ids, np.fromiter(self.children_cache.keys(), dtype=NODE_ID)) + mask = np.isin(node_ids, np.fromiter(self.children_cache.keys(), dtype=NODE_ID)) cached_children_ = self._children_vec(node_ids[mask]) result.update({id_: c_ for id_, c_ in zip(node_ids[mask], cached_children_)}) result.update(self._cg.get_children(node_ids[~mask], raw_only=True)) @@ -114,7 +114,7 @@ def cross_chunk_edges_multiple( node_ids = np.asarray(node_ids, dtype=NODE_ID) if not node_ids.size: return result - mask = np.in1d( + mask = np.isin( node_ids, np.fromiter(self.cross_chunk_edges_cache.keys(), dtype=NODE_ID) ) cached_edges_ = self._cross_chunk_edges_vec(node_ids[mask]) diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py index 0cdfb1885..4d0324e6d 100644 --- a/pychunkedgraph/graph/chunkedgraph.py +++ b/pychunkedgraph/graph/chunkedgraph.py @@ -704,8 +704,8 @@ def get_l2_agglomerations( else: all_chunk_edges = all_chunk_edges.get_pairs() supervoxels = self.get_children(level2_ids, flatten=True) - mask0 = np.in1d(all_chunk_edges[:, 0], supervoxels) - mask1 = np.in1d(all_chunk_edges[:, 1], supervoxels) + mask0 = np.isin(all_chunk_edges[:, 0], supervoxels) + mask1 = np.isin(all_chunk_edges[:, 1], supervoxels) return all_chunk_edges[mask0 & mask1] l2id_children_d = self.get_children(level2_ids) diff --git a/pychunkedgraph/graph/cutting.py b/pychunkedgraph/graph/cutting.py index 8b1583871..525402cdc 100644 --- a/pychunkedgraph/graph/cutting.py +++ b/pychunkedgraph/graph/cutting.py @@ -62,7 +62,7 @@ def merge_cross_chunk_edges_graph_tool( if len(mapping) > 0: mapping = np.concatenate(mapping) u_nodes = np.unique(edges) - u_unmapped_nodes = u_nodes[~np.in1d(u_nodes, mapping)] + u_unmapped_nodes = u_nodes[~np.isin(u_nodes, mapping)] unmapped_mapping = np.concatenate( [u_unmapped_nodes.reshape(-1, 1), u_unmapped_nodes.reshape(-1, 1)], axis=1 ) @@ -189,9 +189,9 @@ def _build_gt_graph(self, edges, affs): ) = flatgraph.build_gt_graph(comb_edges, comb_affs, make_directed=True) self.source_graph_ids = np.where( - np.in1d(self.unique_supervoxel_ids, self.sources) + np.isin(self.unique_supervoxel_ids, self.sources) )[0] - self.sink_graph_ids = np.where(np.in1d(self.unique_supervoxel_ids, self.sinks))[ + self.sink_graph_ids = np.where(np.isin(self.unique_supervoxel_ids, self.sinks))[ 0 ] @@ -398,7 +398,7 @@ def _remap_cut_edge_set(self, cut_edge_set): remapped_cutset_flattened_view = remapped_cutset.view(dtype="u8,u8") edges_flattened_view = self.cg_edges.view(dtype="u8,u8") - cutset_mask = np.in1d(remapped_cutset_flattened_view, edges_flattened_view) + cutset_mask = np.isin(remapped_cutset_flattened_view, edges_flattened_view) return remapped_cutset[cutset_mask] @@ -432,8 +432,8 @@ def _get_split_preview_connected_components(self, cut_edge_set): max_sinks = 0 i = 0 for cc in ccs_test_post_cut: - num_sources = np.count_nonzero(np.in1d(self.source_graph_ids, cc)) - num_sinks = np.count_nonzero(np.in1d(self.sink_graph_ids, cc)) + num_sources = np.count_nonzero(np.isin(self.source_graph_ids, cc)) + num_sinks = np.count_nonzero(np.isin(self.sink_graph_ids, cc)) if num_sources > max_sources: max_sources = num_sources max_source_index = i @@ -486,8 +486,8 @@ def _filter_graph_connected_components(self): # If connected component contains no sources or no sinks, # remove its nodes from the mincut computation if not ( - np.any(np.in1d(self.source_graph_ids, cc)) - and np.any(np.in1d(self.sink_graph_ids, cc)) + np.any(np.isin(self.source_graph_ids, cc)) + and np.any(np.isin(self.sink_graph_ids, cc)) ): for node_id in cc: removed[node_id] = True @@ -525,13 +525,13 @@ def _gt_mincut_sanity_check(self, partition): np.array(np.where(partition.a == i_cc)[0], dtype=int) ] - if np.any(np.in1d(self.sources, cc_list)): - assert np.all(np.in1d(self.sources, cc_list)) - assert ~np.any(np.in1d(self.sinks, cc_list)) + if np.any(np.isin(self.sources, cc_list)): + assert np.all(np.isin(self.sources, cc_list)) + assert ~np.any(np.isin(self.sinks, cc_list)) - if np.any(np.in1d(self.sinks, cc_list)): - assert np.all(np.in1d(self.sinks, cc_list)) - assert ~np.any(np.in1d(self.sources, cc_list)) + if np.any(np.isin(self.sinks, cc_list)): + assert np.all(np.isin(self.sinks, cc_list)) + assert ~np.any(np.isin(self.sources, cc_list)) def _sink_and_source_connectivity_sanity_check(self, cut_edge_set): """ @@ -555,9 +555,9 @@ def _sink_and_source_connectivity_sanity_check(self, cut_edge_set): illegal_split = False try: for cc in ccs_test_post_cut: - if np.any(np.in1d(self.source_graph_ids, cc)): - assert np.all(np.in1d(self.source_graph_ids, cc)) - assert ~np.any(np.in1d(self.sink_graph_ids, cc)) + if np.any(np.isin(self.source_graph_ids, cc)): + assert np.all(np.isin(self.source_graph_ids, cc)) + assert ~np.any(np.isin(self.sink_graph_ids, cc)) if ( len(self.source_path_vertices) == len(cc) and self.disallow_isolating_cut @@ -565,9 +565,9 @@ def _sink_and_source_connectivity_sanity_check(self, cut_edge_set): if not self.partition_edges_within_label(cc): raise IsolatingCutException("Source") - if np.any(np.in1d(self.sink_graph_ids, cc)): - assert np.all(np.in1d(self.sink_graph_ids, cc)) - assert ~np.any(np.in1d(self.source_graph_ids, cc)) + if np.any(np.isin(self.sink_graph_ids, cc)): + assert np.all(np.isin(self.sink_graph_ids, cc)) + assert ~np.any(np.isin(self.source_graph_ids, cc)) if ( len(self.sink_path_vertices) == len(cc) and self.disallow_isolating_cut @@ -664,8 +664,8 @@ def run_split_preview( supervoxels = np.concatenate( [agg.supervoxels for agg in l2id_agglomeration_d.values()] ) - mask0 = np.in1d(edges.node_ids1, supervoxels) - mask1 = np.in1d(edges.node_ids2, supervoxels) + mask0 = np.isin(edges.node_ids1, supervoxels) + mask1 = np.isin(edges.node_ids2, supervoxels) edges = edges[mask0 & mask1] edges_to_remove, illegal_split = run_multicut( edges, diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py index c09f54a3d..a587b3ef2 100644 --- a/pychunkedgraph/graph/edits.py +++ b/pychunkedgraph/graph/edits.py @@ -78,7 +78,7 @@ def _get_relevant_components(edges: np.ndarray, supervoxels: np.ndarray) -> Tupl # when merging, there must be only two components for cc_idx in ccs: cc = graph_ids[cc_idx] - if np.any(np.in1d(supervoxels, cc)): + if np.any(np.isin(supervoxels, cc)): relevant_ccs.append(cc) assert len(relevant_ccs) == 2, "must be 2 components" return relevant_ccs @@ -115,13 +115,13 @@ def merge_preprocess( inactive = np.concatenate(inactive_edges).astype(basetypes.NODE_ID) _inactive = [types.empty_2d] # source to sink edges - source_mask = np.in1d(inactive[:, 0], relevant_ccs[0]) - sink_mask = np.in1d(inactive[:, 1], relevant_ccs[1]) + source_mask = np.isin(inactive[:, 0], relevant_ccs[0]) + sink_mask = np.isin(inactive[:, 1], relevant_ccs[1]) _inactive.append(inactive[source_mask & sink_mask]) # sink to source edges - sink_mask = np.in1d(inactive[:, 1], relevant_ccs[0]) - source_mask = np.in1d(inactive[:, 0], relevant_ccs[1]) + sink_mask = np.isin(inactive[:, 1], relevant_ccs[0]) + source_mask = np.isin(inactive[:, 0], relevant_ccs[1]) _inactive.append(inactive[source_mask & sink_mask]) _inactive = np.concatenate(_inactive).astype(basetypes.NODE_ID) return np.unique(_inactive, axis=0) if _inactive.size else types.empty_2d @@ -291,7 +291,7 @@ def _split_l2_agglomeration( active_mask = neighbor_roots == root cross_edges = cross_edges[active_mask] cross_edges = cross_edges[~in2d(cross_edges, removed_edges)] - isolated_ids = agg.supervoxels[~np.in1d(agg.supervoxels, chunk_edges)] + isolated_ids = agg.supervoxels[~np.isin(agg.supervoxels, chunk_edges)] isolated_edges = np.column_stack((isolated_ids, isolated_ids)) _edges = np.concatenate([chunk_edges, isolated_edges]).astype(basetypes.NODE_ID) graph, _, _, graph_ids = flatgraph.build_gt_graph(_edges, make_directed=True) @@ -305,7 +305,7 @@ def _filter_component_cross_edges( Filters cross edges for a connected component `cc_ids` from `cross_edges` of the complete chunk. """ - mask = np.in1d(cross_edges[:, 0], component_ids) + mask = np.isin(cross_edges[:, 0], component_ids) cross_edges_ = cross_edges[mask] cross_edge_layers_ = cross_edge_layers[mask] edges_d = {} @@ -539,7 +539,7 @@ def _update_id_lineage( parent_layer: int, ): # update newly created children; mask others - mask = np.in1d(children, self._new_ids_d[layer]) + mask = np.isin(children, self._new_ids_d[layer]) for child_id in children[mask]: child_old_ids = self._new_old_id_d[child_id] for id_ in child_old_ids: @@ -574,7 +574,7 @@ def _get_layer_node_ids( old_parents = self.cg.get_parents(old_ids, time_stamp=self._last_successful_ts) siblings = self.cg.get_children(np.unique(old_parents), flatten=True) # replace old identities with new IDs - mask = np.in1d(siblings, old_ids) + mask = np.isin(siblings, old_ids) node_ids = np.concatenate( [_get_flipped_ids(self._old_new_id_d, old_ids), siblings[~mask], new_ids] ).astype(basetypes.NODE_ID) diff --git a/pychunkedgraph/graph/misc.py b/pychunkedgraph/graph/misc.py index 0f53c71c3..38bc31508 100644 --- a/pychunkedgraph/graph/misc.py +++ b/pychunkedgraph/graph/misc.py @@ -142,7 +142,7 @@ def get_contact_sites( ) # Build area lookup dictionary - cs_svs = edges[~np.in1d(edges, sv_ids).reshape(-1, 2)] + cs_svs = edges[~np.isin(edges, sv_ids).reshape(-1, 2)] area_dict = collections.defaultdict(int) for area, sv_id in zip(areas, cs_svs): @@ -165,7 +165,7 @@ def get_contact_sites( cs_dict = collections.defaultdict(list) for cc in ccs: cc_sv_ids = unique_ids[cc] - cc_sv_ids = cc_sv_ids[np.in1d(cc_sv_ids, u_cs_svs)] + cc_sv_ids = cc_sv_ids[np.isin(cc_sv_ids, u_cs_svs)] cs_areas = area_dict_vec(cc_sv_ids) partner_root_id = ( int(cg.get_root(cc_sv_ids[0], time_stamp=time_stamp)) diff --git a/pychunkedgraph/graph/operation.py b/pychunkedgraph/graph/operation.py index a07045cff..a13fd403a 100644 --- a/pychunkedgraph/graph/operation.py +++ b/pychunkedgraph/graph/operation.py @@ -864,7 +864,7 @@ def __init__( self.bbox_offset = np.atleast_1d(bbox_offset).astype(basetypes.COORDINATES) self.path_augment = path_augment self.disallow_isolating_cut = disallow_isolating_cut - if np.any(np.in1d(self.sink_ids, self.source_ids)): + if np.any(np.isin(self.sink_ids, self.source_ids)): raise PreconditionError( "Supervoxels exist in both sink and source, " "try placing the points further apart." @@ -917,8 +917,8 @@ def _apply( supervoxels = np.concatenate( [agg.supervoxels for agg in l2id_agglomeration_d.values()] ).astype(basetypes.NODE_ID) - mask0 = np.in1d(edges.node_ids1, supervoxels) - mask1 = np.in1d(edges.node_ids2, supervoxels) + mask0 = np.isin(edges.node_ids1, supervoxels) + mask1 = np.isin(edges.node_ids2, supervoxels) edges = edges[mask0 & mask1] if len(edges) == 0: raise PreconditionError("No local edges found.") diff --git a/pychunkedgraph/graph/utils/id_helpers.py b/pychunkedgraph/graph/utils/id_helpers.py index aa486ac84..2a245f79c 100644 --- a/pychunkedgraph/graph/utils/id_helpers.py +++ b/pychunkedgraph/graph/utils/id_helpers.py @@ -89,7 +89,7 @@ def get_atomic_id_from_coord( # sort by frequency and discard those ids that have been checked # previously sorted_atomic_ids = atomic_ids[np.argsort(atomic_id_count)] - sorted_atomic_ids = sorted_atomic_ids[~np.in1d(sorted_atomic_ids, checked)] + sorted_atomic_ids = sorted_atomic_ids[~np.isin(sorted_atomic_ids, checked)] # For each candidate id check whether its root id corresponds to the # given root id diff --git a/pychunkedgraph/meshing/manifest/utils.py b/pychunkedgraph/meshing/manifest/utils.py index 67e600653..90963570c 100644 --- a/pychunkedgraph/meshing/manifest/utils.py +++ b/pychunkedgraph/meshing/manifest/utils.py @@ -40,7 +40,7 @@ def _get_children(cg, node_ids: Sequence[np.uint64], children_cache: Dict): if len(node_ids) == 0: return empty_1d.copy() node_ids = np.array(node_ids, dtype=NODE_ID) - mask = np.in1d(node_ids, np.fromiter(children_cache.keys(), dtype=NODE_ID)) + mask = np.isin(node_ids, np.fromiter(children_cache.keys(), dtype=NODE_ID)) children_d = cg.get_children(node_ids[~mask]) children_cache.update(children_d) diff --git a/pychunkedgraph/meshing/meshgen.py b/pychunkedgraph/meshing/meshgen.py index 1fd4cf5b4..c7ab81903 100644 --- a/pychunkedgraph/meshing/meshgen.py +++ b/pychunkedgraph/meshing/meshgen.py @@ -75,7 +75,7 @@ def remap_seg_using_unsafe_dict(seg, unsafe_dict): overlaps.extend(np.unique(seg[:, :, -2][bin_cc_seg[:, :, -1]])) overlaps = np.unique(overlaps) - linked_l2_ids = overlaps[np.in1d(overlaps, unsafe_dict[unsafe_root_id])] + linked_l2_ids = overlaps[np.isin(overlaps, unsafe_dict[unsafe_root_id])] if len(linked_l2_ids) == 0: seg[bin_cc_seg] = 0 @@ -357,7 +357,7 @@ def get_lx_overlapping_remappings(cg, chunk_id, time_stamp=None, n_threads=1): ) safe_lx_ids = lx_ids[u_idx[c_root_ids == 1]] - unsafe_lx_ids = lx_ids[~np.in1d(lx_ids, safe_lx_ids)] + unsafe_lx_ids = lx_ids[~np.isin(lx_ids, safe_lx_ids)] unsafe_root_ids = np.unique(root_ids[u_idx[c_root_ids != 1]]) lx_root_dict = dict(zip(neigh_lx_ids, neigh_root_ids)) @@ -387,7 +387,7 @@ def get_lx_overlapping_remappings(cg, chunk_id, time_stamp=None, n_threads=1): unsafe_dict = collections.defaultdict(list) for root_id in unsafe_root_ids: - if np.sum(~np.in1d(root_lx_dict[root_id], unsafe_lx_ids)) == 0: + if np.sum(~np.isin(root_lx_dict[root_id], unsafe_lx_ids)) == 0: continue for neigh_lx_id in root_lx_dict[root_id]: diff --git a/pychunkedgraph/utils/general.py b/pychunkedgraph/utils/general.py index ac4929660..8913025c7 100644 --- a/pychunkedgraph/utils/general.py +++ b/pychunkedgraph/utils/general.py @@ -40,4 +40,4 @@ def chunked(l: Sequence, n: int): def in2d(arr1: np.ndarray, arr2: np.ndarray) -> np.ndarray: arr1_view = arr1.view(dtype="u8,u8").reshape(arr1.shape[0]) arr2_view = arr2.view(dtype="u8,u8").reshape(arr2.shape[0]) - return np.in1d(arr1_view, arr2_view) + return np.isin(arr1_view, arr2_view) From 9937e3ae00f8048bf82e4e1806096ae9331d5d05 Mon Sep 17 00:00:00 2001 From: Akhilesh Halageri Date: Sat, 24 Jan 2026 03:06:09 +0000 Subject: [PATCH 09/14] feat(stitching): don't use locks if stitch_mode True --- pychunkedgraph/graph/locks.py | 6 +++--- pychunkedgraph/graph/operation.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pychunkedgraph/graph/locks.py b/pychunkedgraph/graph/locks.py index e3918f0ea..40231c21c 100644 --- a/pychunkedgraph/graph/locks.py +++ b/pychunkedgraph/graph/locks.py @@ -49,12 +49,12 @@ def __init__( self.privileged_mode = privileged_mode def __enter__(self): + if not self.operation_id: + self.operation_id = self.cg.id_client.create_operation_id() + if self.privileged_mode: - assert self.operation_id is not None, "Please provide operation ID." warn("Warning: Privileged mode without acquiring lock.") return self - if not self.operation_id: - self.operation_id = self.cg.id_client.create_operation_id() nodes_ts = self.cg.get_node_timestamps(self.root_ids, return_numpy=0) min_ts = min(nodes_ts) diff --git a/pychunkedgraph/graph/operation.py b/pychunkedgraph/graph/operation.py index a13fd403a..304597782 100644 --- a/pychunkedgraph/graph/operation.py +++ b/pychunkedgraph/graph/operation.py @@ -416,6 +416,7 @@ def execute( op_type = "merge" if is_merge else "split" self.parent_ts = parent_ts root_ids = self._update_root_ids() + self.privileged_mode = self.privileged_mode or (is_merge and self.stitch_mode) with locks.RootLock( self.cg, root_ids, From 784c1d8e386159c0f4395715001e65a28cf85810 Mon Sep 17 00:00:00 2001 From: Dodam Ih Date: Fri, 23 Jan 2026 23:14:46 -0800 Subject: [PATCH 10/14] hotfix: exponential range search for root ID assignment --- pychunkedgraph/graph/edits.py | 81 +++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py index a587b3ef2..69df76421 100644 --- a/pychunkedgraph/graph/edits.py +++ b/pychunkedgraph/graph/edits.py @@ -10,7 +10,6 @@ import fastremap import numpy as np -import fastremap from . import types from . import attributes @@ -196,6 +195,7 @@ def add_edges( edges, l2_cross_edges_d = _analyze_affected_edges( cg, atomic_edges, parent_ts=parent_ts ) + l2ids = np.unique(edges) if not allow_same_segment_merge and not stitch_mode: roots = cg.get_roots(l2ids, assert_roots=True, time_stamp=parent_ts) @@ -203,6 +203,7 @@ def add_edges( new_old_id_d = defaultdict(set) old_new_id_d = defaultdict(set) + old_hierarchy_d = _init_old_hierarchy(cg, l2ids, parent_ts=parent_ts) atomic_children_d = cg.get_children(l2ids) cross_edges_d = merge_cross_edge_dicts( @@ -211,6 +212,7 @@ def add_edges( graph, _, _, graph_ids = flatgraph.build_gt_graph(edges, make_directed=True) components = flatgraph.connected_components(graph) + new_l2_ids = [] for cc_indices in components: l2ids_ = graph_ids[cc_indices] @@ -257,6 +259,7 @@ def add_edges( new_roots = create_parents.run() sanity_check(cg, new_roots, operation_id) create_parents.create_new_entries() + return new_roots, new_l2_ids, create_parents.new_entries @@ -484,6 +487,7 @@ def _update_neighbor_cross_edges( """ updated_counterparts = {} newid_cx_edges_d = cg.get_cross_chunk_edges(new_ids, time_stamp=parent_ts) + node_map = {} for k, v in old_new_id.items(): if len(v) == 1: @@ -497,11 +501,13 @@ def _update_neighbor_cross_edges( cg, new_id, cx_edges_d, node_map, parent_ts=parent_ts ) updated_counterparts.update(result) + updated_entries = [] for node, val_dict in updated_counterparts.items(): rowkey = serialize_uint64(node) row = cg.client.mutate_row(rowkey, val_dict, time_stamp=time_stamp) updated_entries.append(row) + return updated_entries @@ -548,14 +554,9 @@ def _update_id_lineage( self._old_new_id_d[old_id].add(parent) def _get_connected_components(self, node_ids: np.ndarray, layer: int): - with TimeIt( - f"get_cross_chunk_edges.{layer}", - self.cg.graph_id, - self._operation_id, - ): - cross_edges_d = self.cg.get_cross_chunk_edges( - node_ids, time_stamp=self._last_successful_ts - ) + cross_edges_d = self.cg.get_cross_chunk_edges( + node_ids, time_stamp=self._last_successful_ts + ) cx_edges = [types.empty_2d] for id_ in node_ids: @@ -573,6 +574,7 @@ def _get_layer_node_ids( # get their parents, then children of those parents old_parents = self.cg.get_parents(old_ids, time_stamp=self._last_successful_ts) siblings = self.cg.get_children(np.unique(old_parents), flatten=True) + # replace old identities with new IDs mask = np.isin(siblings, old_ids) node_ids = np.concatenate( @@ -592,19 +594,23 @@ def _update_cross_edge_cache(self, parent, children): if parent_layer == 2: # l2 cross edges have already been updated by this point return + cx_edges_d = self.cg.get_cross_chunk_edges( children, time_stamp=self._last_successful_ts ) cx_edges_d = concatenate_cross_edge_dicts(cx_edges_d.values()) + cx_edges_d, edge_nodes = get_latest_edges_wrapper( self.cg, cx_edges_d, parent_ts=self._last_successful_ts ) + edge_parents = self.cg.get_roots( edge_nodes, stop_layer=parent_layer, ceil=False, time_stamp=self._last_successful_ts, ) + edge_parents_d = dict(zip(edge_nodes, edge_parents)) new_cx_edges_d = {} for layer in range(parent_layer, self.cg.meta.layer_count): @@ -631,7 +637,8 @@ def _create_new_parents(self, layer: int): new_ids = self._new_ids_d[layer] layer_node_ids = self._get_layer_node_ids(new_ids, layer) components, graph_ids = self._get_connected_components(layer_node_ids, layer) - for cc_indices in components: + + for cc_idx, cc_indices in enumerate(components): parent_layer = layer + 1 # must be reset for each connected component cc_ids = graph_ids[cc_indices] if len(cc_ids) == 1: @@ -645,14 +652,21 @@ def _create_new_parents(self, layer: int): parent_layer = l break - while True: - parent = self.cg.id_client.create_node_id( - self.cg.get_parent_chunk_id(cc_ids[0], parent_layer), - root_chunk=parent_layer == self.cg.meta.layer_count, + chunk_id = self.cg.get_parent_chunk_id(cc_ids[0], parent_layer) + is_root = parent_layer == self.cg.meta.layer_count + batch_size = 4096 + parent = None + while parent is None: + candidate_ids = self.cg.id_client.create_node_ids( + chunk_id, batch_size, root_chunk=is_root ) - _entry = self.cg.client.read_node(parent) - if _entry == {}: - break + existing = self.cg.client.read_nodes(node_ids=candidate_ids) + for cid in candidate_ids: + if cid not in existing: + parent = cid + break + if parent is None: + batch_size *= 2 self._new_ids_d[parent_layer].append(parent) self._update_id_lineage(parent, cc_ids, layer, parent_layer) @@ -686,21 +700,22 @@ def run(self) -> Iterable: continue # all new IDs in this layer have been created # update their cross chunk edges and their neighbors' - m = f"create_new_parents_layer.{layer}" - with TimeIt(m, self.cg.graph_id, self._operation_id): - for new_id in self._new_ids_d[layer]: - children = self.cg.get_children(new_id) - self._update_cross_edge_cache(new_id, children) - entries = _update_neighbor_cross_edges( - self.cg, - self._new_ids_d[layer], - self._new_old_id_d, - self._old_new_id_d, - time_stamp=self._time_stamp, - parent_ts=self._last_successful_ts, - ) - self.new_entries.extend(entries) - self._create_new_parents(layer) + for new_id in self._new_ids_d[layer]: + children = self.cg.get_children(new_id) + self._update_cross_edge_cache(new_id, children) + + entries = _update_neighbor_cross_edges( + self.cg, + self._new_ids_d[layer], + self._new_old_id_d, + self._old_new_id_d, + time_stamp=self._time_stamp, + parent_ts=self._last_successful_ts, + ) + self.new_entries.extend(entries) + + self._create_new_parents(layer) + return self._new_ids_d[self.cg.meta.layer_count] def _update_root_id_lineage(self): @@ -756,6 +771,7 @@ def _get_cross_edges_val_dicts(self): def create_new_entries(self) -> List: val_dicts = self._get_cross_edges_val_dicts() + for layer in range(2, self.cg.meta.layer_count + 1): new_ids = self._new_ids_d[layer] for id_ in new_ids: @@ -781,4 +797,5 @@ def create_new_entries(self) -> List: time_stamp=self._time_stamp, ) ) + self._update_root_id_lineage() From 278f3c0869bbbe007bb2b0825047fb9a0361dccf Mon Sep 17 00:00:00 2001 From: Dodam Ih Date: Fri, 23 Jan 2026 23:55:59 -0800 Subject: [PATCH 11/14] hotfix: explicit uint64 typing to avoid float64 rounding --- pychunkedgraph/graph/edges/utils.py | 4 +++- pychunkedgraph/graph/segmenthistory.py | 2 +- pychunkedgraph/meshing/meshgen.py | 6 +++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pychunkedgraph/graph/edges/utils.py b/pychunkedgraph/graph/edges/utils.py index b49a9a547..a7bad1463 100644 --- a/pychunkedgraph/graph/edges/utils.py +++ b/pychunkedgraph/graph/edges/utils.py @@ -69,7 +69,9 @@ def merge_cross_edge_dicts(x_edges_d1: Dict, x_edges_d2: Dict) -> Dict: Combines two cross chunk dictionaries of form {node_id: {layer id : edge list}}. """ - node_ids = np.unique(list(x_edges_d1.keys()) + list(x_edges_d2.keys())) + node_ids = np.unique( + np.array(list(x_edges_d1.keys()) + list(x_edges_d2.keys()), dtype=basetypes.NODE_ID) + ) result_d = {} for node_id in node_ids: cross_edge_ds = [x_edges_d1.get(node_id, {}), x_edges_d2.get(node_id, {})] diff --git a/pychunkedgraph/graph/segmenthistory.py b/pychunkedgraph/graph/segmenthistory.py index 30f42d15b..0a215cf92 100644 --- a/pychunkedgraph/graph/segmenthistory.py +++ b/pychunkedgraph/graph/segmenthistory.py @@ -78,7 +78,7 @@ def operation_id_root_id_dict(self): @property def operation_ids(self): - return np.array(list(self.operation_id_root_id_dict.keys())) + return np.array(list(self.operation_id_root_id_dict.keys()), dtype=basetypes.OPERATION_ID) @property def _log_rows(self): diff --git a/pychunkedgraph/meshing/meshgen.py b/pychunkedgraph/meshing/meshgen.py index c7ab81903..d137d52ad 100644 --- a/pychunkedgraph/meshing/meshgen.py +++ b/pychunkedgraph/meshing/meshgen.py @@ -253,7 +253,7 @@ def _get_root_ids(args): lx_id_remap = get_higher_to_lower_remapping(cg, chunk_id, time_stamp=time_stamp) - lx_ids = np.array(list(lx_id_remap.keys())) + lx_ids = np.array(list(lx_id_remap.keys()), dtype=np.uint64) root_ids = np.zeros(len(lx_ids), dtype=np.uint64) n_jobs = np.min([n_threads, len(lx_ids)]) @@ -1034,8 +1034,8 @@ def get_multi_child_nodes(cg, chunk_id, node_id_subset=None, chunk_bbox_string=F node_ids=node_id_subset, properties=attributes.Hierarchy.Child ) - node_ids = np.array(list(range_read.keys())) - node_rows = np.array(list(range_read.values())) + node_ids = np.array(list(range_read.keys()), dtype=np.uint64) + node_rows = np.array(list(range_read.values()), dtype=object) child_fragments = np.array( [ fragment.value From be4bb37acfd8805c8eef8cb250771edfad23735c Mon Sep 17 00:00:00 2001 From: Dodam Ih Date: Sat, 24 Jan 2026 01:26:12 -0800 Subject: [PATCH 12/14] temp: profiling --- pychunkedgraph/graph/edits.py | 512 ++++++++++++++++++++---------- pychunkedgraph/graph/operation.py | 59 ++-- 2 files changed, 375 insertions(+), 196 deletions(-) diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py index 69df76421..87718dd26 100644 --- a/pychunkedgraph/graph/edits.py +++ b/pychunkedgraph/graph/edits.py @@ -1,12 +1,15 @@ # pylint: disable=invalid-name, missing-docstring, too-many-locals, c-extension-no-member import datetime +import time +import os from typing import Dict from typing import List from typing import Tuple from typing import Iterable from typing import Set from collections import defaultdict +from contextlib import contextmanager import fastremap import numpy as np @@ -25,13 +28,126 @@ from ..debug.utils import sanity_check, sanity_check_single +class HierarchicalProfiler: + """ + Hierarchical profiler for detailed timing breakdowns. + Tracks timing at multiple levels and prints a breakdown at the end. + """ + + def __init__(self, enabled: bool = True): + self.enabled = enabled + self.timings: Dict[str, List[float]] = defaultdict(list) + self.call_counts: Dict[str, int] = defaultdict(int) + self.stack: List[Tuple[str, float]] = [] + self.current_path: List[str] = [] + + @contextmanager + def profile(self, name: str): + """Context manager for profiling a code block.""" + if not self.enabled: + yield + return + + full_path = ".".join(self.current_path + [name]) + self.current_path.append(name) + start_time = time.perf_counter() + + try: + yield + finally: + elapsed = time.perf_counter() - start_time + self.timings[full_path].append(elapsed) + self.call_counts[full_path] += 1 + self.current_path.pop() + + def print_report(self, operation_id=None): + """Print a detailed timing breakdown.""" + if not self.enabled or not self.timings: + return + + print("\n" + "=" * 80) + print(f"PROFILER REPORT{f' (operation_id={operation_id})' if operation_id else ''}") + print("=" * 80) + + # Group by depth level + by_depth: Dict[int, List[Tuple[str, float, int]]] = defaultdict(list) + for path, times in self.timings.items(): + depth = path.count(".") + total_time = sum(times) + count = self.call_counts[path] + by_depth[depth].append((path, total_time, count)) + + # Sort each level by total time + for depth in sorted(by_depth.keys()): + items = sorted(by_depth[depth], key=lambda x: -x[1]) + for path, total_time, count in items: + indent = " " * depth + avg_time = total_time / count if count > 0 else 0 + if count > 1: + print( + f"{indent}{path}: {total_time*1000:.2f}ms total " + f"({count} calls, {avg_time*1000:.2f}ms avg)" + ) + else: + print(f"{indent}{path}: {total_time*1000:.2f}ms") + + # Print summary + print("-" * 80) + top_level_total = sum( + sum(times) for path, times in self.timings.items() if "." not in path + ) + print(f"Total top-level time: {top_level_total*1000:.2f}ms") + + # Print top 10 slowest operations + print("\nTop 10 slowest operations:") + all_ops = [ + (path, sum(times), self.call_counts[path]) + for path, times in self.timings.items() + ] + all_ops.sort(key=lambda x: -x[1]) + for i, (path, total_time, count) in enumerate(all_ops[:10]): + pct = (total_time / top_level_total * 100) if top_level_total > 0 else 0 + print(f" {i+1}. {path}: {total_time*1000:.2f}ms ({pct:.1f}%)") + + print("=" * 80 + "\n") + + def reset(self): + """Reset all timing data.""" + self.timings.clear() + self.call_counts.clear() + self.stack.clear() + self.current_path.clear() + + +# Global profiler instance - enable via environment variable +PROFILER_ENABLED = os.environ.get("PCG_PROFILER_ENABLED", "1") == "1" +_profiler: HierarchicalProfiler = None + + +def get_profiler() -> HierarchicalProfiler: + """Get or create the global profiler instance.""" + global _profiler + if _profiler is None: + _profiler = HierarchicalProfiler(enabled=PROFILER_ENABLED) + return _profiler + + +def reset_profiler(): + """Reset the global profiler.""" + global _profiler + if _profiler is not None: + _profiler.reset() + + def _init_old_hierarchy(cg, l2ids: np.ndarray, parent_ts: datetime.datetime = None): + profiler = get_profiler() old_hierarchy_d = {id_: {2: id_} for id_ in l2ids} - for id_ in l2ids: - layer_parent_d = cg.get_all_parents_dict(id_, time_stamp=parent_ts) - old_hierarchy_d[id_].update(layer_parent_d) - for parent in layer_parent_d.values(): - old_hierarchy_d[parent] = old_hierarchy_d[id_] + with profiler.profile("init_hierarchy_get_parents"): + for id_ in l2ids: + layer_parent_d = cg.get_all_parents_dict(id_, time_stamp=parent_ts) + old_hierarchy_d[id_].update(layer_parent_d) + for parent in layer_parent_d.values(): + old_hierarchy_d[parent] = old_hierarchy_d[id_] return old_hierarchy_d @@ -43,8 +159,11 @@ def _analyze_affected_edges( Also returns new cross edges dicts for nodes crossing chunk boundary. """ + profiler = get_profiler() + supervoxels = np.unique(atomic_edges) - parents = cg.get_parents(supervoxels, time_stamp=parent_ts) + with profiler.profile("analyze_get_parents"): + parents = cg.get_parents(supervoxels, time_stamp=parent_ts) sv_parent_d = dict(zip(supervoxels.tolist(), parents)) edge_layers = cg.get_cross_chunk_edges_layer(atomic_edges) parent_edges = [ @@ -192,74 +311,95 @@ def add_edges( allow_same_segment_merge=False, stitch_mode: bool = False, ): - edges, l2_cross_edges_d = _analyze_affected_edges( - cg, atomic_edges, parent_ts=parent_ts - ) + profiler = get_profiler() + profiler.reset() # Reset for fresh profiling - l2ids = np.unique(edges) - if not allow_same_segment_merge and not stitch_mode: - roots = cg.get_roots(l2ids, assert_roots=True, time_stamp=parent_ts) - assert np.unique(roots).size >= 2, "L2 IDs must belong to different roots." + with profiler.profile("add_edges"): + with profiler.profile("analyze_affected_edges"): + edges, l2_cross_edges_d = _analyze_affected_edges( + cg, atomic_edges, parent_ts=parent_ts + ) - new_old_id_d = defaultdict(set) - old_new_id_d = defaultdict(set) + l2ids = np.unique(edges) + if not allow_same_segment_merge and not stitch_mode: + with profiler.profile("validate_roots"): + roots = cg.get_roots(l2ids, assert_roots=True, time_stamp=parent_ts) + assert np.unique(roots).size >= 2, "L2 IDs must belong to different roots." - old_hierarchy_d = _init_old_hierarchy(cg, l2ids, parent_ts=parent_ts) - atomic_children_d = cg.get_children(l2ids) - cross_edges_d = merge_cross_edge_dicts( - cg.get_cross_chunk_edges(l2ids, time_stamp=parent_ts), l2_cross_edges_d - ) + new_old_id_d = defaultdict(set) + old_new_id_d = defaultdict(set) - graph, _, _, graph_ids = flatgraph.build_gt_graph(edges, make_directed=True) - components = flatgraph.connected_components(graph) + with profiler.profile("init_old_hierarchy"): + old_hierarchy_d = _init_old_hierarchy(cg, l2ids, parent_ts=parent_ts) - new_l2_ids = [] - for cc_indices in components: - l2ids_ = graph_ids[cc_indices] - new_id = cg.id_client.create_node_id(cg.get_chunk_id(l2ids_[0])) - new_l2_ids.append(new_id) - new_old_id_d[new_id].update(l2ids_) - for id_ in l2ids_: - old_new_id_d[id_].add(new_id) + with profiler.profile("get_children"): + atomic_children_d = cg.get_children(l2ids) - # update cache - # map parent to new merged children and vice versa - merged_children = np.concatenate( - [atomic_children_d[l2id] for l2id in l2ids_] - ).astype(basetypes.NODE_ID) - cg.cache.children_cache[new_id] = merged_children - cache_utils.update(cg.cache.parents_cache, merged_children, new_id) + with profiler.profile("get_cross_chunk_edges"): + cross_edges_d = merge_cross_edge_dicts( + cg.get_cross_chunk_edges(l2ids, time_stamp=parent_ts), l2_cross_edges_d + ) - # update cross chunk edges by replacing old_ids with new - # this can be done only after all new IDs have been created - for new_id, cc_indices in zip(new_l2_ids, components): - l2ids_ = graph_ids[cc_indices] - new_cx_edges_d = {} - cx_edges = [cross_edges_d[l2id] for l2id in l2ids_] - cx_edges_d = concatenate_cross_edge_dicts(cx_edges, unique=True) - temp_map = {k: next(iter(v)) for k, v in old_new_id_d.items()} - for layer, edges in cx_edges_d.items(): - edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True) - new_cx_edges_d[layer] = edges - assert np.all(edges[:, 0] == new_id) - cg.cache.cross_chunk_edges_cache[new_id] = new_cx_edges_d + with profiler.profile("build_graph"): + graph, _, _, graph_ids = flatgraph.build_gt_graph(edges, make_directed=True) + components = flatgraph.connected_components(graph) + + with profiler.profile("create_l2_ids"): + new_l2_ids = [] + for cc_indices in components: + l2ids_ = graph_ids[cc_indices] + new_id = cg.id_client.create_node_id(cg.get_chunk_id(l2ids_[0])) + new_l2_ids.append(new_id) + new_old_id_d[new_id].update(l2ids_) + for id_ in l2ids_: + old_new_id_d[id_].add(new_id) + + # update cache + # map parent to new merged children and vice versa + merged_children = np.concatenate( + [atomic_children_d[l2id] for l2id in l2ids_] + ).astype(basetypes.NODE_ID) + cg.cache.children_cache[new_id] = merged_children + cache_utils.update(cg.cache.parents_cache, merged_children, new_id) + + # update cross chunk edges by replacing old_ids with new + # this can be done only after all new IDs have been created + with profiler.profile("update_cross_edges"): + for new_id, cc_indices in zip(new_l2_ids, components): + l2ids_ = graph_ids[cc_indices] + new_cx_edges_d = {} + cx_edges = [cross_edges_d[l2id] for l2id in l2ids_] + cx_edges_d = concatenate_cross_edge_dicts(cx_edges, unique=True) + temp_map = {k: next(iter(v)) for k, v in old_new_id_d.items()} + for layer, edges in cx_edges_d.items(): + edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True) + new_cx_edges_d[layer] = edges + assert np.all(edges[:, 0] == new_id) + cg.cache.cross_chunk_edges_cache[new_id] = new_cx_edges_d + + create_parents = CreateParentNodes( + cg, + new_l2_ids=new_l2_ids, + old_hierarchy_d=old_hierarchy_d, + new_old_id_d=new_old_id_d, + old_new_id_d=old_new_id_d, + operation_id=operation_id, + time_stamp=time_stamp, + parent_ts=parent_ts, + stitch_mode=stitch_mode, + profiler=profiler, + ) - create_parents = CreateParentNodes( - cg, - new_l2_ids=new_l2_ids, - old_hierarchy_d=old_hierarchy_d, - new_old_id_d=new_old_id_d, - old_new_id_d=old_new_id_d, - operation_id=operation_id, - time_stamp=time_stamp, - parent_ts=parent_ts, - stitch_mode=stitch_mode, - ) + with profiler.profile("create_parent_nodes_run"): + new_roots = create_parents.run() - new_roots = create_parents.run() - sanity_check(cg, new_roots, operation_id) - create_parents.create_new_entries() + with profiler.profile("sanity_check"): + sanity_check(cg, new_roots, operation_id) + + with profiler.profile("create_new_entries"): + create_parents.create_new_entries() + profiler.print_report(operation_id) return new_roots, new_l2_ids, create_parents.new_entries @@ -485,28 +625,33 @@ def _update_neighbor_cross_edges( and then write to storage to consolidate the mutations. Returns mutations to updated counterparts/partner nodes. """ + profiler = get_profiler() updated_counterparts = {} - newid_cx_edges_d = cg.get_cross_chunk_edges(new_ids, time_stamp=parent_ts) + + with profiler.profile("neighbor_get_cross_chunk_edges"): + newid_cx_edges_d = cg.get_cross_chunk_edges(new_ids, time_stamp=parent_ts) node_map = {} for k, v in old_new_id.items(): if len(v) == 1: node_map[k] = next(iter(v)) - for new_id in new_ids: - cx_edges_d = newid_cx_edges_d[new_id] - m = {old_id: new_id for old_id in _get_flipped_ids(new_old_id, [new_id])} - node_map.update(m) - result = _update_neighbor_cross_edges_single( - cg, new_id, cx_edges_d, node_map, parent_ts=parent_ts - ) - updated_counterparts.update(result) + with profiler.profile("neighbor_update_loop"): + for new_id in new_ids: + cx_edges_d = newid_cx_edges_d[new_id] + m = {old_id: new_id for old_id in _get_flipped_ids(new_old_id, [new_id])} + node_map.update(m) + result = _update_neighbor_cross_edges_single( + cg, new_id, cx_edges_d, node_map, parent_ts=parent_ts + ) + updated_counterparts.update(result) - updated_entries = [] - for node, val_dict in updated_counterparts.items(): - rowkey = serialize_uint64(node) - row = cg.client.mutate_row(rowkey, val_dict, time_stamp=time_stamp) - updated_entries.append(row) + with profiler.profile("neighbor_create_mutations"): + updated_entries = [] + for node, val_dict in updated_counterparts.items(): + rowkey = serialize_uint64(node) + row = cg.client.mutate_row(rowkey, val_dict, time_stamp=time_stamp) + updated_entries.append(row) return updated_entries @@ -524,6 +669,7 @@ def __init__( old_hierarchy_d: Dict[np.uint64, Dict[int, np.uint64]] = None, parent_ts: datetime.datetime = None, stitch_mode: bool = False, + profiler: HierarchicalProfiler = None, ): self.cg = cg self.new_entries = [] @@ -536,6 +682,7 @@ def __init__( self._time_stamp = time_stamp self._last_successful_ts = parent_ts self.stitch_mode = stitch_mode + self._profiler = profiler if profiler else get_profiler() def _update_id_lineage( self, @@ -554,17 +701,23 @@ def _update_id_lineage( self._old_new_id_d[old_id].add(parent) def _get_connected_components(self, node_ids: np.ndarray, layer: int): - cross_edges_d = self.cg.get_cross_chunk_edges( - node_ids, time_stamp=self._last_successful_ts - ) + with self._profiler.profile("cc_get_cross_chunk_edges"): + cross_edges_d = self.cg.get_cross_chunk_edges( + node_ids, time_stamp=self._last_successful_ts + ) cx_edges = [types.empty_2d] for id_ in node_ids: edges_ = cross_edges_d[id_].get(layer, types.empty_2d) cx_edges.append(edges_) cx_edges = np.concatenate([*cx_edges, np.vstack([node_ids, node_ids]).T]).astype(basetypes.NODE_ID) - graph, _, _, graph_ids = flatgraph.build_gt_graph(cx_edges, make_directed=True) - return flatgraph.connected_components(graph), graph_ids + + with self._profiler.profile("cc_build_graph"): + graph, _, _, graph_ids = flatgraph.build_gt_graph(cx_edges, make_directed=True) + + with self._profiler.profile("cc_find_components"): + components = flatgraph.connected_components(graph) + return components, graph_ids def _get_layer_node_ids( self, new_ids: np.ndarray, layer: int @@ -572,8 +725,11 @@ def _get_layer_node_ids( # get old identities of new IDs old_ids = _get_flipped_ids(self._new_old_id_d, new_ids) # get their parents, then children of those parents - old_parents = self.cg.get_parents(old_ids, time_stamp=self._last_successful_ts) - siblings = self.cg.get_children(np.unique(old_parents), flatten=True) + with self._profiler.profile("layer_node_ids_get_parents"): + old_parents = self.cg.get_parents(old_ids, time_stamp=self._last_successful_ts) + + with self._profiler.profile("layer_node_ids_get_children"): + siblings = self.cg.get_children(np.unique(old_parents), flatten=True) # replace old identities with new IDs mask = np.isin(siblings, old_ids) @@ -595,21 +751,24 @@ def _update_cross_edge_cache(self, parent, children): # l2 cross edges have already been updated by this point return - cx_edges_d = self.cg.get_cross_chunk_edges( - children, time_stamp=self._last_successful_ts - ) - cx_edges_d = concatenate_cross_edge_dicts(cx_edges_d.values()) + with self._profiler.profile("get_cross_chunk_edges_for_cache"): + cx_edges_d = self.cg.get_cross_chunk_edges( + children, time_stamp=self._last_successful_ts + ) + cx_edges_d = concatenate_cross_edge_dicts(cx_edges_d.values()) - cx_edges_d, edge_nodes = get_latest_edges_wrapper( - self.cg, cx_edges_d, parent_ts=self._last_successful_ts - ) + with self._profiler.profile("get_latest_edges_wrapper"): + cx_edges_d, edge_nodes = get_latest_edges_wrapper( + self.cg, cx_edges_d, parent_ts=self._last_successful_ts + ) - edge_parents = self.cg.get_roots( - edge_nodes, - stop_layer=parent_layer, - ceil=False, - time_stamp=self._last_successful_ts, - ) + with self._profiler.profile("get_roots_for_cross_edges"): + edge_parents = self.cg.get_roots( + edge_nodes, + stop_layer=parent_layer, + ceil=False, + time_stamp=self._last_successful_ts, + ) edge_parents_d = dict(zip(edge_nodes, edge_parents)) new_cx_edges_d = {} @@ -635,8 +794,12 @@ def _create_new_parents(self, layer: int): update parent old IDs """ new_ids = self._new_ids_d[layer] - layer_node_ids = self._get_layer_node_ids(new_ids, layer) - components, graph_ids = self._get_connected_components(layer_node_ids, layer) + + with self._profiler.profile("get_layer_node_ids"): + layer_node_ids = self._get_layer_node_ids(new_ids, layer) + + with self._profiler.profile("get_connected_components"): + components, graph_ids = self._get_connected_components(layer_node_ids, layer) for cc_idx, cc_indices in enumerate(components): parent_layer = layer + 1 # must be reset for each connected component @@ -644,50 +807,53 @@ def _create_new_parents(self, layer: int): if len(cc_ids) == 1: # skip connection parent_layer = self.cg.meta.layer_count - for l in range(layer + 1, self.cg.meta.layer_count): - cx_edges_d = self.cg.get_cross_chunk_edges( - [cc_ids[0]], time_stamp=self._last_successful_ts - ) - if len(cx_edges_d[cc_ids[0]].get(l, types.empty_2d)) > 0: - parent_layer = l - break + with self._profiler.profile("find_parent_layer"): + for l in range(layer + 1, self.cg.meta.layer_count): + cx_edges_d = self.cg.get_cross_chunk_edges( + [cc_ids[0]], time_stamp=self._last_successful_ts + ) + if len(cx_edges_d[cc_ids[0]].get(l, types.empty_2d)) > 0: + parent_layer = l + break chunk_id = self.cg.get_parent_chunk_id(cc_ids[0], parent_layer) is_root = parent_layer == self.cg.meta.layer_count batch_size = 4096 parent = None - while parent is None: - candidate_ids = self.cg.id_client.create_node_ids( - chunk_id, batch_size, root_chunk=is_root - ) - existing = self.cg.client.read_nodes(node_ids=candidate_ids) - for cid in candidate_ids: - if cid not in existing: - parent = cid - break - if parent is None: - batch_size *= 2 + with self._profiler.profile("create_and_verify_node_id"): + while parent is None: + candidate_ids = self.cg.id_client.create_node_ids( + chunk_id, batch_size, root_chunk=is_root + ) + existing = self.cg.client.read_nodes(node_ids=candidate_ids) + for cid in candidate_ids: + if cid not in existing: + parent = cid + break + if parent is None: + batch_size *= 2 self._new_ids_d[parent_layer].append(parent) self._update_id_lineage(parent, cc_ids, layer, parent_layer) self.cg.cache.children_cache[parent] = cc_ids cache_utils.update(self.cg.cache.parents_cache, cc_ids, parent) - try: - sanity_check_single(self.cg, parent, self._operation_id) - except AssertionError: - from pychunkedgraph.debug.utils import get_l2children - - pairs = [ - (a, b) for idx, a in enumerate(cc_ids) for b in cc_ids[idx + 1 :] - ] - for c1, c2 in pairs: - l2c1 = get_l2children(self.cg, c1) - l2c2 = get_l2children(self.cg, c2) - if np.intersect1d(l2c1, l2c2).size: - c = np.intersect1d(l2c1, l2c2) - msg = f"{self._operation_id}: {layer} {c1} {c2} have common children {c}" - raise ValueError(msg) + with self._profiler.profile("sanity_check_single"): + try: + sanity_check_single(self.cg, parent, self._operation_id) + except AssertionError: + from pychunkedgraph.debug.utils import get_l2children + + pairs = [ + (a, b) for idx, a in enumerate(cc_ids) for b in cc_ids[idx + 1 :] + ] + for c1, c2 in pairs: + l2c1 = get_l2children(self.cg, c1) + l2c2 = get_l2children(self.cg, c2) + if np.intersect1d(l2c1, l2c2).size: + c = np.intersect1d(l2c1, l2c2) + msg = f"{self._operation_id}: {layer} {c1} {c2} have common children {c}" + raise ValueError(msg) def run(self) -> Iterable: """ @@ -700,21 +866,24 @@ def run(self) -> Iterable: continue # all new IDs in this layer have been created # update their cross chunk edges and their neighbors' - for new_id in self._new_ids_d[layer]: - children = self.cg.get_children(new_id) - self._update_cross_edge_cache(new_id, children) - - entries = _update_neighbor_cross_edges( - self.cg, - self._new_ids_d[layer], - self._new_old_id_d, - self._old_new_id_d, - time_stamp=self._time_stamp, - parent_ts=self._last_successful_ts, - ) - self.new_entries.extend(entries) + with self._profiler.profile(f"layer_{layer}_update_cross_edge_cache"): + for new_id in self._new_ids_d[layer]: + children = self.cg.get_children(new_id) + self._update_cross_edge_cache(new_id, children) + + with self._profiler.profile(f"layer_{layer}_update_neighbor_cross_edges"): + entries = _update_neighbor_cross_edges( + self.cg, + self._new_ids_d[layer], + self._new_old_id_d, + self._old_new_id_d, + time_stamp=self._time_stamp, + parent_ts=self._last_successful_ts, + ) + self.new_entries.extend(entries) - self._create_new_parents(layer) + with self._profiler.profile(f"layer_{layer}_create_new_parents"): + self._create_new_parents(layer) return self._new_ids_d[self.cg.meta.layer_count] @@ -770,32 +939,35 @@ def _get_cross_edges_val_dicts(self): return val_dicts def create_new_entries(self) -> List: - val_dicts = self._get_cross_edges_val_dicts() - - for layer in range(2, self.cg.meta.layer_count + 1): - new_ids = self._new_ids_d[layer] - for id_ in new_ids: - val_dict = val_dicts.get(id_, {}) - children = self.cg.get_children(id_) - err = f"parent layer less than children; op {self._operation_id}" - assert np.max( - self.cg.get_chunk_layers(children) - ) < self.cg.get_chunk_layer(id_), err - val_dict[attributes.Hierarchy.Child] = children - self.new_entries.append( - self.cg.client.mutate_row( - serialize_uint64(id_), - val_dict, - time_stamp=self._time_stamp, - ) - ) - for child_id in children: + with self._profiler.profile("get_cross_edges_val_dicts"): + val_dicts = self._get_cross_edges_val_dicts() + + with self._profiler.profile("build_hierarchy_entries"): + for layer in range(2, self.cg.meta.layer_count + 1): + new_ids = self._new_ids_d[layer] + for id_ in new_ids: + val_dict = val_dicts.get(id_, {}) + children = self.cg.get_children(id_) + err = f"parent layer less than children; op {self._operation_id}" + assert np.max( + self.cg.get_chunk_layers(children) + ) < self.cg.get_chunk_layer(id_), err + val_dict[attributes.Hierarchy.Child] = children self.new_entries.append( self.cg.client.mutate_row( - serialize_uint64(child_id), - {attributes.Hierarchy.Parent: id_}, + serialize_uint64(id_), + val_dict, time_stamp=self._time_stamp, ) ) + for child_id in children: + self.new_entries.append( + self.cg.client.mutate_row( + serialize_uint64(child_id), + {attributes.Hierarchy.Parent: id_}, + time_stamp=self._time_stamp, + ) + ) - self._update_root_id_lineage() + with self._profiler.profile("update_root_id_lineage"): + self._update_root_id_lineage() diff --git a/pychunkedgraph/graph/operation.py b/pychunkedgraph/graph/operation.py index 304597782..090f3997a 100644 --- a/pychunkedgraph/graph/operation.py +++ b/pychunkedgraph/graph/operation.py @@ -22,6 +22,7 @@ from . import attributes from .edges import Edges from .edges.utils import get_edges_status +from .edits import get_profiler from .utils import basetypes from .utils import serializers from .cache import CacheService @@ -603,11 +604,14 @@ def _update_root_ids(self) -> np.ndarray: def _apply( self, *, operation_id, timestamp ) -> Tuple[np.ndarray, np.ndarray, List["bigtable.row.Row"]]: - root_ids = set( - self.cg.get_roots( - self.added_edges.ravel(), assert_roots=True, time_stamp=self.parent_ts + profiler = get_profiler() + + with profiler.profile("merge_apply_get_roots"): + root_ids = set( + self.cg.get_roots( + self.added_edges.ravel(), assert_roots=True, time_stamp=self.parent_ts + ) ) - ) if len(root_ids) < 2 and not self.allow_same_segment_merge: raise PreconditionError( "Supervoxels must belong to different objects." @@ -619,32 +623,35 @@ def _apply( fake_edge_rows = [] if not self.stitch_mode: bbox = get_bbox(self.source_coords, self.sink_coords, self.bbox_offset) - with TimeIt("subgraph", self.cg.graph_id, operation_id): - edges = self.cg.get_subgraph( - root_ids, - bbox=bbox, - bbox_is_coordinate=True, - edges_only=True, - ) + with profiler.profile("get_subgraph"): + with TimeIt("subgraph", self.cg.graph_id, operation_id): + edges = self.cg.get_subgraph( + root_ids, + bbox=bbox, + bbox_is_coordinate=True, + edges_only=True, + ) if self.allow_same_segment_merge: inactive_edges = types.empty_2d else: - with TimeIt("preprocess", self.cg.graph_id, operation_id): - inactive_edges = edits.merge_preprocess( - self.cg, - subgraph_edges=edges, - supervoxels=self.added_edges.ravel(), - parent_ts=self.parent_ts, - ) - - atomic_edges, fake_edge_rows = edits.check_fake_edges( - self.cg, - atomic_edges=self.added_edges, - inactive_edges=inactive_edges, - time_stamp=timestamp, - parent_ts=self.parent_ts, - ) + with profiler.profile("merge_preprocess"): + with TimeIt("preprocess", self.cg.graph_id, operation_id): + inactive_edges = edits.merge_preprocess( + self.cg, + subgraph_edges=edges, + supervoxels=self.added_edges.ravel(), + parent_ts=self.parent_ts, + ) + + with profiler.profile("check_fake_edges"): + atomic_edges, fake_edge_rows = edits.check_fake_edges( + self.cg, + atomic_edges=self.added_edges, + inactive_edges=inactive_edges, + time_stamp=timestamp, + parent_ts=self.parent_ts, + ) with TimeIt("add_edges", self.cg.graph_id, operation_id): new_roots, new_l2_ids, new_entries = edits.add_edges( From acbc8a12484f22d66dc6e3641cf5da2770ff6c92 Mon Sep 17 00:00:00 2001 From: Dodam Ih Date: Sat, 24 Jan 2026 02:20:58 -0800 Subject: [PATCH 13/14] hotfix: exponentials start at 1 --- pychunkedgraph/graph/edits.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py index 87718dd26..926a479b7 100644 --- a/pychunkedgraph/graph/edits.py +++ b/pychunkedgraph/graph/edits.py @@ -818,7 +818,7 @@ def _create_new_parents(self, layer: int): chunk_id = self.cg.get_parent_chunk_id(cc_ids[0], parent_layer) is_root = parent_layer == self.cg.meta.layer_count - batch_size = 4096 + batch_size = 1 parent = None with self._profiler.profile("create_and_verify_node_id"): while parent is None: @@ -831,7 +831,7 @@ def _create_new_parents(self, layer: int): parent = cid break if parent is None: - batch_size *= 2 + batch_size = min(batch_size * 2, 2**16) self._new_ids_d[parent_layer].append(parent) self._update_id_lineage(parent, cc_ids, layer, parent_layer) From f71e52d466f1b0242d6ba6d7a4c6b62154190b4b Mon Sep 17 00:00:00 2001 From: Dodam Ih Date: Sun, 25 Jan 2026 15:56:13 -0800 Subject: [PATCH 14/14] fix: filter children array during meshing --- pychunkedgraph/meshing/meshgen_utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pychunkedgraph/meshing/meshgen_utils.py b/pychunkedgraph/meshing/meshgen_utils.py index 711c09322..43e6f5c3a 100644 --- a/pychunkedgraph/meshing/meshgen_utils.py +++ b/pychunkedgraph/meshing/meshgen_utils.py @@ -129,7 +129,13 @@ def recursive_helper(cur_node_ids): only_child_mask = np.array( [len(children_for_node) == 1 for children_for_node in children_array] ) - only_children = children_array[only_child_mask].astype(np.uint64).ravel() + # Extract children from object array - each filtered element is a 1-element array + filtered_children = children_array[only_child_mask] + only_children = ( + np.concatenate(filtered_children).astype(np.uint64) + if filtered_children.size + else np.array([], dtype=np.uint64) + ) if np.any(only_child_mask): temp_array = cur_node_ids[stop_layer_mask] temp_array[only_child_mask] = recursive_helper(only_children)