From 505921e7a73d38ed4dc4b88801af000337c4634a Mon Sep 17 00:00:00 2001 From: Anvit Tawar Date: Fri, 29 May 2026 00:40:02 +0000 Subject: [PATCH 1/6] feat: support row_range in sample_row_keys method --- .../cloud/bigtable/data/_async/client.py | 9 +++- .../bigtable/data/_sync_autogen/client.py | 9 +++- .../google/cloud/bigtable/table.py | 12 +++-- .../handlers/client_handler_data_async.py | 6 ++- .../client_handler_data_sync_autogen.py | 7 ++- .../handlers/client_handler_legacy.py | 11 ++++- .../tests/unit/data/_async/test_client.py | 24 ++++++++++ .../unit/data/_sync_autogen/test_client.py | 46 +++++++++++++------ .../tests/unit/v2_client/test_table.py | 27 +++++++++++ 9 files changed, 125 insertions(+), 26 deletions(-) diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py index b2c13521240f..01eca124f2b0 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py @@ -83,7 +83,7 @@ from google.cloud.bigtable.data.execute_query.values import ExecuteQueryValueType from google.cloud.bigtable.data.mutations import Mutation, RowMutationEntry from google.cloud.bigtable.data.read_modify_write_rules import ReadModifyWriteRule -from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery +from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery, RowRange from google.cloud.bigtable.data.row import Row from google.cloud.bigtable.data.row_filters import ( CellsRowLimitFilter, @@ -1389,6 +1389,7 @@ async def row_exists( async def sample_row_keys( self, *, + row_range: RowRange | None = None, operation_timeout: float | TABLE_DEFAULT = TABLE_DEFAULT.DEFAULT, attempt_timeout: float | None | TABLE_DEFAULT = TABLE_DEFAULT.DEFAULT, retryable_errors: Sequence[type[Exception]] @@ -1406,6 +1407,8 @@ async def sample_row_keys( row_keys, along with offset positions in the table Args: + row_range: the range of rows to sample. If not provided, samples the + entire table. operation_timeout: the time budget for the entire operation, in seconds. Failed requests will be retried within the budget.i Defaults to the Table's default_operation_timeout @@ -1443,7 +1446,9 @@ async def sample_row_keys( async def execute_rpc(): results = await self.client._gapic_client.sample_row_keys( request=SampleRowKeysRequest( - app_profile_id=self.app_profile_id, **self._request_path + app_profile_id=self.app_profile_id, + row_range=row_range._to_pb() if row_range else None, + **self._request_path ), timeout=next(attempt_timeout_gen), retry=None, diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/client.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/client.py index 9dc118de0289..c065ebc92aab 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/client.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/client.py @@ -84,7 +84,7 @@ from google.cloud.bigtable.data.execute_query.values import ExecuteQueryValueType from google.cloud.bigtable.data.mutations import Mutation, RowMutationEntry from google.cloud.bigtable.data.read_modify_write_rules import ReadModifyWriteRule -from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery +from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery, RowRange from google.cloud.bigtable.data.row import Row from google.cloud.bigtable.data.row_filters import ( CellsRowLimitFilter, @@ -1139,6 +1139,7 @@ def row_exists( def sample_row_keys( self, *, + row_range: RowRange | None = None, operation_timeout: float | TABLE_DEFAULT = TABLE_DEFAULT.DEFAULT, attempt_timeout: float | None | TABLE_DEFAULT = TABLE_DEFAULT.DEFAULT, retryable_errors: Sequence[type[Exception]] @@ -1155,6 +1156,8 @@ def sample_row_keys( row_keys, along with offset positions in the table Args: + row_range: the range of rows to sample. If not provided, samples the + entire table. operation_timeout: the time budget for the entire operation, in seconds. Failed requests will be retried within the budget.i Defaults to the Table's default_operation_timeout @@ -1187,7 +1190,9 @@ def sample_row_keys( def execute_rpc(): results = self.client._gapic_client.sample_row_keys( request=SampleRowKeysRequest( - app_profile_id=self.app_profile_id, **self._request_path + app_profile_id=self.app_profile_id, + row_range=row_range._to_pb() if row_range else None, + **self._request_path, ), timeout=next(attempt_timeout_gen), retry=None, diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/table.py b/packages/google-cloud-bigtable/google/cloud/bigtable/table.py index 1ded1d6f0948..d0022bdf97aa 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/table.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/table.py @@ -739,7 +739,7 @@ def mutate_rows(self, rows, retry=DEFAULT_RETRY, timeout=DEFAULT): ) return retryable_mutate_rows(retry=retry) - def sample_row_keys(self): + def sample_row_keys(self, row_range=None): """Read a sample of row keys in the table. For example: @@ -772,15 +772,19 @@ def sample_row_keys(self): samples would require space roughly equal to the difference in their ``offset_bytes`` fields. + :type row_range: :class:`~google.cloud.bigtable.row_set.RowRange` + :param row_range: (Optional) Row range to restrict the sample to. + :rtype: :class:`~google.cloud.exceptions.GrpcRendezvous` :returns: A cancel-able iterator. Can be consumed by calling ``next()`` or by casting to a :class:`list` and can be cancelled by calling ``cancel()``. """ data_client = self._instance._client.table_data_client - response_iterator = data_client.sample_row_keys( - request={"table_name": self.name, "app_profile_id": self._app_profile_id} - ) + request = {"table_name": self.name, "app_profile_id": self._app_profile_id} + if row_range is not None: + request["row_range"] = row_range.get_range_kwargs() + response_iterator = data_client.sample_row_keys(request=request) return response_iterator diff --git a/packages/google-cloud-bigtable/test_proxy/handlers/client_handler_data_async.py b/packages/google-cloud-bigtable/test_proxy/handlers/client_handler_data_async.py index 246b7fcd70cc..38084e991514 100644 --- a/packages/google-cloud-bigtable/test_proxy/handlers/client_handler_data_async.py +++ b/packages/google-cloud-bigtable/test_proxy/handlers/client_handler_data_async.py @@ -250,7 +250,11 @@ async def SampleRowKeys(self, request, **kwargs): kwargs["operation_timeout"] = ( kwargs.get("operation_timeout", self.per_operation_timeout) or 20 ) - result = CrossSync.rm_aio(await table.sample_row_keys(**kwargs)) + row_range = None + if "row_range" in request: + from google.cloud.bigtable.data.read_rows_query import RowRange + row_range = RowRange._from_dict(request["row_range"]) + result = CrossSync.rm_aio(await table.sample_row_keys(row_range=row_range, **kwargs)) return result @error_safe diff --git a/packages/google-cloud-bigtable/test_proxy/handlers/client_handler_data_sync_autogen.py b/packages/google-cloud-bigtable/test_proxy/handlers/client_handler_data_sync_autogen.py index b2864db94b21..869014be0598 100644 --- a/packages/google-cloud-bigtable/test_proxy/handlers/client_handler_data_sync_autogen.py +++ b/packages/google-cloud-bigtable/test_proxy/handlers/client_handler_data_sync_autogen.py @@ -187,7 +187,12 @@ async def SampleRowKeys(self, request, **kwargs): kwargs["operation_timeout"] = ( kwargs.get("operation_timeout", self.per_operation_timeout) or 20 ) - result = table.sample_row_keys(**kwargs) + row_range = None + if "row_range" in request: + from google.cloud.bigtable.data.read_rows_query import RowRange + + row_range = RowRange._from_dict(request["row_range"]) + result = table.sample_row_keys(row_range=row_range, **kwargs) return result @error_safe diff --git a/packages/google-cloud-bigtable/test_proxy/handlers/client_handler_legacy.py b/packages/google-cloud-bigtable/test_proxy/handlers/client_handler_legacy.py index 63fe357b0b33..78c1af244003 100644 --- a/packages/google-cloud-bigtable/test_proxy/handlers/client_handler_legacy.py +++ b/packages/google-cloud-bigtable/test_proxy/handlers/client_handler_legacy.py @@ -230,6 +230,15 @@ async def SampleRowKeys(self, request, **kwargs): table_id = request["table_name"].split("/")[-1] instance = self.client.instance(self.instance_id) table = instance.table(table_id) - response = list(table.sample_row_keys()) + row_range = None + if "row_range" in request: + from google.cloud.bigtable.row_set import RowRange + rr_dict = request["row_range"] + start_key = rr_dict.get("start_key_closed") or rr_dict.get("start_key_open") + start_inclusive = "start_key_closed" in rr_dict + end_key = rr_dict.get("end_key_closed") or rr_dict.get("end_key_open") + end_inclusive = "end_key_closed" in rr_dict + row_range = RowRange(start_key, end_key, start_inclusive, end_inclusive) + response = list(table.sample_row_keys(row_range=row_range)) tuple_response = [(s.row_key, s.offset_bytes) for s in response] return tuple_response diff --git a/packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py b/packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py index 6c6719615c40..5ad8c707a533 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py +++ b/packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py @@ -2392,6 +2392,30 @@ async def test_sample_row_keys(self): assert result[1] == samples[1] assert result[2] == samples[2] + @CrossSync.pytest + async def test_sample_row_keys_w_row_range(self): + """ + Test that method returns the expected key samples when row_range is provided + """ + samples = [ + (b"test_1", 0), + (b"test_2", 100), + (b"test_3", 200), + ] + from google.cloud.bigtable.data import RowRange + row_range = RowRange(start_key=b"a", end_key=b"b") + async with self._make_client() as client: + async with client.get_table("instance", "table") as table: + with mock.patch.object( + table.client._gapic_client, "sample_row_keys", CrossSync.Mock() + ) as sample_row_keys: + sample_row_keys.return_value = self._make_gapic_stream(samples) + result = await table.sample_row_keys(row_range=row_range) + assert len(result) == 3 + sample_row_keys.assert_called_once() + called_request = sample_row_keys.call_args[1]["request"] + assert called_request.row_range == row_range._to_pb() + @CrossSync.pytest async def test_sample_row_keys_bad_timeout(self): """ diff --git a/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py b/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py index 79ad903b6191..957afb3314a4 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py +++ b/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py @@ -208,10 +208,8 @@ def test__start_background_channel_refresh(self): def test__ping_and_warm_instances(self): """test ping and warm with mocked asyncio.gather""" client_mock = mock.Mock() - client_mock._execute_ping_and_warms = ( - lambda *args: self._get_target_class()._execute_ping_and_warms( - client_mock, *args - ) + client_mock._execute_ping_and_warms = lambda *args: ( + self._get_target_class()._execute_ping_and_warms(client_mock, *args) ) with mock.patch.object( CrossSync._Sync_Impl, "gather_partials", CrossSync._Sync_Impl.Mock() @@ -254,10 +252,8 @@ def test__ping_and_warm_instances(self): def test__ping_and_warm_single_instance(self): """should be able to call ping and warm with single instance""" client_mock = mock.Mock() - client_mock._execute_ping_and_warms = ( - lambda *args: self._get_target_class()._execute_ping_and_warms( - client_mock, *args - ) + client_mock._execute_ping_and_warms = lambda *args: ( + self._get_target_class()._execute_ping_and_warms(client_mock, *args) ) with mock.patch.object( CrossSync._Sync_Impl, "gather_partials", CrossSync._Sync_Impl.Mock() @@ -1326,11 +1322,11 @@ def _make_client(self, *args, **kwargs): def _make_table(self, *args, **kwargs): client_mock = mock.Mock() - client_mock._register_instance.side_effect = ( - lambda *args, **kwargs: CrossSync._Sync_Impl.yield_to_event_loop() + client_mock._register_instance.side_effect = lambda *args, **kwargs: ( + CrossSync._Sync_Impl.yield_to_event_loop() ) - client_mock._remove_instance_registration.side_effect = ( - lambda *args, **kwargs: CrossSync._Sync_Impl.yield_to_event_loop() + client_mock._remove_instance_registration.side_effect = lambda *args, **kwargs: ( + CrossSync._Sync_Impl.yield_to_event_loop() ) kwargs["instance_id"] = kwargs.get( "instance_id", args[0] if args else "instance" @@ -1792,9 +1788,8 @@ def test_read_rows_sharded_multiple_queries(self): with mock.patch.object( table.client._gapic_client, "read_rows" ) as read_rows: - read_rows.side_effect = ( - lambda *args, - **kwargs: CrossSync._Sync_Impl.TestReadRows._make_gapic_stream( + read_rows.side_effect = lambda *args, **kwargs: ( + CrossSync._Sync_Impl.TestReadRows._make_gapic_stream( [ CrossSync._Sync_Impl.TestReadRows._make_chunk(row_key=k) for k in args[0].rows.row_keys @@ -1998,6 +1993,26 @@ def test_sample_row_keys(self): assert result[1] == samples[1] assert result[2] == samples[2] + def test_sample_row_keys_w_row_range(self): + """Test that method returns the expected key samples when row_range is provided""" + samples = [(b"test_1", 0), (b"test_2", 100), (b"test_3", 200)] + from google.cloud.bigtable.data import RowRange + + row_range = RowRange(start_key=b"a", end_key=b"b") + with self._make_client() as client: + with client.get_table("instance", "table") as table: + with mock.patch.object( + table.client._gapic_client, + "sample_row_keys", + CrossSync._Sync_Impl.Mock(), + ) as sample_row_keys: + sample_row_keys.return_value = self._make_gapic_stream(samples) + result = table.sample_row_keys(row_range=row_range) + assert len(result) == 3 + sample_row_keys.assert_called_once() + called_request = sample_row_keys.call_args[1]["request"] + assert called_request.row_range == row_range._to_pb() + def test_sample_row_keys_bad_timeout(self): """should raise error if timeout is negative""" with self._make_client() as client: @@ -2883,6 +2898,7 @@ def prepare_mock(self, client): yield prepare_mock def _make_gapic_stream(self, sample_list: list["ExecuteQueryResponse" | Exception]): + class MockStream: def __init__(self, sample_list): self.sample_list = sample_list diff --git a/packages/google-cloud-bigtable/tests/unit/v2_client/test_table.py b/packages/google-cloud-bigtable/tests/unit/v2_client/test_table.py index 882fbbc413d0..4029726cb2c3 100644 --- a/packages/google-cloud-bigtable/tests/unit/v2_client/test_table.py +++ b/packages/google-cloud-bigtable/tests/unit/v2_client/test_table.py @@ -1175,6 +1175,33 @@ def test_table_sample_row_keys(): assert result[0] == response_iterator +def test_table_sample_row_keys_w_row_range(): + credentials = _make_credentials() + client = _make_client(project="project-id", credentials=credentials, admin=True) + instance = client.instance(instance_id=INSTANCE_ID) + table = _make_table(TABLE_ID, instance) + response_iterator = object() + + data_api = client._table_data_client = _make_data_api() + data_api.sample_row_keys.return_value = [response_iterator] + + from google.cloud.bigtable.row_set import RowRange + row_range = RowRange(start_key=b"a", end_key=b"b", start_inclusive=True, end_inclusive=False) + result = table.sample_row_keys(row_range=row_range) + + assert result[0] == response_iterator + data_api.sample_row_keys.assert_called_once_with( + request={ + "table_name": table.name, + "app_profile_id": table._app_profile_id, + "row_range": { + "start_key_closed": b"a", + "end_key_open": b"b", + } + } + ) + + def test_table_truncate(): credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) From 5c89eb0491216a5b17d634ce132fec46aa520fa5 Mon Sep 17 00:00:00 2001 From: Anvit Tawar Date: Mon, 1 Jun 2026 21:34:00 +0000 Subject: [PATCH 2/6] feat: update sample_row_keys docstrings and unit tests - In Table.sample_row_keys, updated the docstring to clarify that the end key of the provided row range is always returned as the last sample (or empty string if no range is specified), and updated the offset_bytes description to specify it includes preceding rows but after the last sample before the range. - In async and sync unit tests, updated the mocked SampleRowKeys stream responses to use realistic keys falling inside the RowRange bounds (e.g. b"a_key1" and b"b"), and updated the key/length assertions accordingly. - Reverted manual GAPIC type patches to let OwlBot/Librarian auto-patching handle the API class fields natively upon proto submission. TAG=agy CONV=6cea44b5-6adb-45c6-b8c8-58b37f95584c --- .../google/cloud/bigtable/table.py | 21 +++++++++++++------ .../tests/unit/data/_async/test_client.py | 9 ++++---- .../unit/data/_sync_autogen/test_client.py | 6 ++++-- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/table.py b/packages/google-cloud-bigtable/google/cloud/bigtable/table.py index d0022bdf97aa..df5ea21d3446 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/table.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/table.py @@ -755,10 +755,13 @@ def sample_row_keys(self, row_range=None): The elements in the iterator are a SampleRowKeys response and they have the properties ``offset_bytes`` and ``row_key``. They occur in sorted - order. The table might have contents before the first row key in the - list and after the last one, but a key containing the empty string - indicates "end of table" and will be the last response given, if - present. + order. The returned keys in the sorted stream sequence are restricted to the + ``row_range`` if specified in the request. + The table might have contents before the first row key in the + list and after the last one, but the ``end_key`` of the provided + ``row_range`` is always the last response given. If no ``row_range`` is + provided, a key containing the empty string will be the last response, + indicating "end of table". .. note:: @@ -768,12 +771,18 @@ def sample_row_keys(self, row_range=None): The ``offset_bytes`` field on a response indicates the approximate total storage space used by all rows in the table which precede - ``row_key``. Buffering the contents of all rows between two subsequent + ``row_key`` (and if a row-range is specified in the request, which + follow what would have been the previous sample before the row-range + start). Buffering the contents of all rows between two subsequent samples would require space roughly equal to the difference in their ``offset_bytes`` fields. :type row_range: :class:`~google.cloud.bigtable.row_set.RowRange` - :param row_range: (Optional) Row range to restrict the sample to. + :param row_range: + (Optional) Row range to restrict the sample to. If a ``row_range`` is + provided, the returned samples will be restricted to the specified + range. The output will always return the end key in the range as the + last sample returned. :rtype: :class:`~google.cloud.exceptions.GrpcRendezvous` :returns: A cancel-able iterator. Can be consumed by calling ``next()`` diff --git a/packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py b/packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py index 5ad8c707a533..45bfa0320794 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py +++ b/packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py @@ -2398,9 +2398,8 @@ async def test_sample_row_keys_w_row_range(self): Test that method returns the expected key samples when row_range is provided """ samples = [ - (b"test_1", 0), - (b"test_2", 100), - (b"test_3", 200), + (b"a_key1", 100), + (b"b", 200), ] from google.cloud.bigtable.data import RowRange row_range = RowRange(start_key=b"a", end_key=b"b") @@ -2411,7 +2410,9 @@ async def test_sample_row_keys_w_row_range(self): ) as sample_row_keys: sample_row_keys.return_value = self._make_gapic_stream(samples) result = await table.sample_row_keys(row_range=row_range) - assert len(result) == 3 + assert len(result) == 2 + assert result[0] == samples[0] + assert result[1] == samples[1] sample_row_keys.assert_called_once() called_request = sample_row_keys.call_args[1]["request"] assert called_request.row_range == row_range._to_pb() diff --git a/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py b/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py index 957afb3314a4..c22683503f43 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py +++ b/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py @@ -1995,7 +1995,7 @@ def test_sample_row_keys(self): def test_sample_row_keys_w_row_range(self): """Test that method returns the expected key samples when row_range is provided""" - samples = [(b"test_1", 0), (b"test_2", 100), (b"test_3", 200)] + samples = [(b"a_key1", 100), (b"b", 200)] from google.cloud.bigtable.data import RowRange row_range = RowRange(start_key=b"a", end_key=b"b") @@ -2008,7 +2008,9 @@ def test_sample_row_keys_w_row_range(self): ) as sample_row_keys: sample_row_keys.return_value = self._make_gapic_stream(samples) result = table.sample_row_keys(row_range=row_range) - assert len(result) == 3 + assert len(result) == 2 + assert result[0] == samples[0] + assert result[1] == samples[1] sample_row_keys.assert_called_once() called_request = sample_row_keys.call_args[1]["request"] assert called_request.row_range == row_range._to_pb() From ce74362537e5d5b9c18da67e1489ba129bc47df8 Mon Sep 17 00:00:00 2001 From: Anvit Tawar Date: Tue, 2 Jun 2026 17:54:47 +0000 Subject: [PATCH 3/6] revert: remove legacy client changes for row_range in sample_row_keys Reverts the recently implemented row_range support from the legacy client code to keep it scoped cleanly to modern clients only: - Reverted Table.sample_row_keys signature, docstrings, and implementation in google/cloud/bigtable/table.py. - Reverted row_range parameter parsing in legacy test proxy client handler. - Removed unit test test_table_sample_row_keys_w_row_range in tests/unit/v2_client/test_table.py. TAG=agy CONV=6cea44b5-6adb-45c6-b8c8-58b37f95584c --- .../google/cloud/bigtable/table.py | 30 +++++-------------- .../handlers/client_handler_legacy.py | 11 +------ .../tests/unit/v2_client/test_table.py | 26 ---------------- 3 files changed, 9 insertions(+), 58 deletions(-) diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/table.py b/packages/google-cloud-bigtable/google/cloud/bigtable/table.py index df5ea21d3446..b32fe210d372 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/table.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/table.py @@ -739,7 +739,7 @@ def mutate_rows(self, rows, retry=DEFAULT_RETRY, timeout=DEFAULT): ) return retryable_mutate_rows(retry=retry) - def sample_row_keys(self, row_range=None): + def sample_row_keys(self): """Read a sample of row keys in the table. For example: @@ -755,13 +755,9 @@ def sample_row_keys(self, row_range=None): The elements in the iterator are a SampleRowKeys response and they have the properties ``offset_bytes`` and ``row_key``. They occur in sorted - order. The returned keys in the sorted stream sequence are restricted to the - ``row_range`` if specified in the request. - The table might have contents before the first row key in the - list and after the last one, but the ``end_key`` of the provided - ``row_range`` is always the last response given. If no ``row_range`` is - provided, a key containing the empty string will be the last response, - indicating "end of table". + order. The table might have contents before the first row key in the + list and after the last one, but a key containing the empty string + indicates "end of table" and will be the last response given, if present. .. note:: @@ -771,29 +767,19 @@ def sample_row_keys(self, row_range=None): The ``offset_bytes`` field on a response indicates the approximate total storage space used by all rows in the table which precede - ``row_key`` (and if a row-range is specified in the request, which - follow what would have been the previous sample before the row-range - start). Buffering the contents of all rows between two subsequent + ``row_key``. Buffering the contents of all rows between two subsequent samples would require space roughly equal to the difference in their ``offset_bytes`` fields. - :type row_range: :class:`~google.cloud.bigtable.row_set.RowRange` - :param row_range: - (Optional) Row range to restrict the sample to. If a ``row_range`` is - provided, the returned samples will be restricted to the specified - range. The output will always return the end key in the range as the - last sample returned. - :rtype: :class:`~google.cloud.exceptions.GrpcRendezvous` :returns: A cancel-able iterator. Can be consumed by calling ``next()`` or by casting to a :class:`list` and can be cancelled by calling ``cancel()``. """ data_client = self._instance._client.table_data_client - request = {"table_name": self.name, "app_profile_id": self._app_profile_id} - if row_range is not None: - request["row_range"] = row_range.get_range_kwargs() - response_iterator = data_client.sample_row_keys(request=request) + response_iterator = data_client.sample_row_keys( + request={"table_name": self.name, "app_profile_id": self._app_profile_id} + ) return response_iterator diff --git a/packages/google-cloud-bigtable/test_proxy/handlers/client_handler_legacy.py b/packages/google-cloud-bigtable/test_proxy/handlers/client_handler_legacy.py index 78c1af244003..63fe357b0b33 100644 --- a/packages/google-cloud-bigtable/test_proxy/handlers/client_handler_legacy.py +++ b/packages/google-cloud-bigtable/test_proxy/handlers/client_handler_legacy.py @@ -230,15 +230,6 @@ async def SampleRowKeys(self, request, **kwargs): table_id = request["table_name"].split("/")[-1] instance = self.client.instance(self.instance_id) table = instance.table(table_id) - row_range = None - if "row_range" in request: - from google.cloud.bigtable.row_set import RowRange - rr_dict = request["row_range"] - start_key = rr_dict.get("start_key_closed") or rr_dict.get("start_key_open") - start_inclusive = "start_key_closed" in rr_dict - end_key = rr_dict.get("end_key_closed") or rr_dict.get("end_key_open") - end_inclusive = "end_key_closed" in rr_dict - row_range = RowRange(start_key, end_key, start_inclusive, end_inclusive) - response = list(table.sample_row_keys(row_range=row_range)) + response = list(table.sample_row_keys()) tuple_response = [(s.row_key, s.offset_bytes) for s in response] return tuple_response diff --git a/packages/google-cloud-bigtable/tests/unit/v2_client/test_table.py b/packages/google-cloud-bigtable/tests/unit/v2_client/test_table.py index 4029726cb2c3..6c6b3a0f25bc 100644 --- a/packages/google-cloud-bigtable/tests/unit/v2_client/test_table.py +++ b/packages/google-cloud-bigtable/tests/unit/v2_client/test_table.py @@ -1175,32 +1175,6 @@ def test_table_sample_row_keys(): assert result[0] == response_iterator -def test_table_sample_row_keys_w_row_range(): - credentials = _make_credentials() - client = _make_client(project="project-id", credentials=credentials, admin=True) - instance = client.instance(instance_id=INSTANCE_ID) - table = _make_table(TABLE_ID, instance) - response_iterator = object() - - data_api = client._table_data_client = _make_data_api() - data_api.sample_row_keys.return_value = [response_iterator] - - from google.cloud.bigtable.row_set import RowRange - row_range = RowRange(start_key=b"a", end_key=b"b", start_inclusive=True, end_inclusive=False) - result = table.sample_row_keys(row_range=row_range) - - assert result[0] == response_iterator - data_api.sample_row_keys.assert_called_once_with( - request={ - "table_name": table.name, - "app_profile_id": table._app_profile_id, - "row_range": { - "start_key_closed": b"a", - "end_key_open": b"b", - } - } - ) - def test_table_truncate(): credentials = _make_credentials() From 3cd73e8669bccf8de07305c4e1f24cbf65268422 Mon Sep 17 00:00:00 2001 From: Anvit Tawar Date: Tue, 2 Jun 2026 18:23:44 +0000 Subject: [PATCH 4/6] fix: use explicit is not None check for row_range in client Updates the row_range check in both async and sync clients to use an explicit `is not None` check rather than truthiness evaluation. TAG=agy CONV=6cea44b5-6adb-45c6-b8c8-58b37f95584c --- .../google/cloud/bigtable/data/_async/client.py | 2 +- .../google/cloud/bigtable/data/_sync_autogen/client.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py index 01eca124f2b0..420900a96c7b 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py @@ -1447,7 +1447,7 @@ async def execute_rpc(): results = await self.client._gapic_client.sample_row_keys( request=SampleRowKeysRequest( app_profile_id=self.app_profile_id, - row_range=row_range._to_pb() if row_range else None, + row_range=row_range._to_pb() if row_range is not None else None, **self._request_path ), timeout=next(attempt_timeout_gen), diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/client.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/client.py index c065ebc92aab..f89373718cc9 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/client.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/client.py @@ -1191,7 +1191,7 @@ def execute_rpc(): results = self.client._gapic_client.sample_row_keys( request=SampleRowKeysRequest( app_profile_id=self.app_profile_id, - row_range=row_range._to_pb() if row_range else None, + row_range=row_range._to_pb() if row_range is not None else None, **self._request_path, ), timeout=next(attempt_timeout_gen), From 738dcebe0ffe65229f60dc5eb3b7e13a778c9988 Mon Sep 17 00:00:00 2001 From: Anvit Tawar Date: Tue, 2 Jun 2026 18:28:28 +0000 Subject: [PATCH 5/6] revert: discard formatting-only changes in legacy table module Restores packages/google-cloud-bigtable/google/cloud/bigtable/table.py and packages/google-cloud-bigtable/tests/unit/v2_client/test_table.py back to their original main branch states to eliminate redundant diffs from the PR. TAG=agy CONV=6cea44b5-6adb-45c6-b8c8-58b37f95584c --- packages/google-cloud-bigtable/google/cloud/bigtable/table.py | 3 ++- .../google-cloud-bigtable/tests/unit/v2_client/test_table.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/table.py b/packages/google-cloud-bigtable/google/cloud/bigtable/table.py index b32fe210d372..1ded1d6f0948 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/table.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/table.py @@ -757,7 +757,8 @@ def sample_row_keys(self): the properties ``offset_bytes`` and ``row_key``. They occur in sorted order. The table might have contents before the first row key in the list and after the last one, but a key containing the empty string - indicates "end of table" and will be the last response given, if present. + indicates "end of table" and will be the last response given, if + present. .. note:: diff --git a/packages/google-cloud-bigtable/tests/unit/v2_client/test_table.py b/packages/google-cloud-bigtable/tests/unit/v2_client/test_table.py index 6c6b3a0f25bc..882fbbc413d0 100644 --- a/packages/google-cloud-bigtable/tests/unit/v2_client/test_table.py +++ b/packages/google-cloud-bigtable/tests/unit/v2_client/test_table.py @@ -1175,7 +1175,6 @@ def test_table_sample_row_keys(): assert result[0] == response_iterator - def test_table_truncate(): credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) From bcada0ea0dfbdf03c1a64cd1b4caaea1c13385d0 Mon Sep 17 00:00:00 2001 From: Anvit Tawar Date: Tue, 2 Jun 2026 19:28:27 +0000 Subject: [PATCH 6/6] style: format async client and tests with ruff 0.14.14 Applies `ruff format` using matching version 0.14.14 to resolve style failures on the PR, ensuring only minimal necessary style corrections are made. TAG=agy CONV=6cea44b5-6adb-45c6-b8c8-58b37f95584c --- .../cloud/bigtable/data/_async/client.py | 2 +- .../tests/unit/data/_async/test_client.py | 1 + .../unit/data/_sync_autogen/test_client.py | 26 +++++++++++-------- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py index 420900a96c7b..a47eabeb9994 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py @@ -1448,7 +1448,7 @@ async def execute_rpc(): request=SampleRowKeysRequest( app_profile_id=self.app_profile_id, row_range=row_range._to_pb() if row_range is not None else None, - **self._request_path + **self._request_path, ), timeout=next(attempt_timeout_gen), retry=None, diff --git a/packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py b/packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py index 45bfa0320794..2dfe50444263 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py +++ b/packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py @@ -2402,6 +2402,7 @@ async def test_sample_row_keys_w_row_range(self): (b"b", 200), ] from google.cloud.bigtable.data import RowRange + row_range = RowRange(start_key=b"a", end_key=b"b") async with self._make_client() as client: async with client.get_table("instance", "table") as table: diff --git a/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py b/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py index c22683503f43..6d061d16147b 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py +++ b/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py @@ -208,8 +208,10 @@ def test__start_background_channel_refresh(self): def test__ping_and_warm_instances(self): """test ping and warm with mocked asyncio.gather""" client_mock = mock.Mock() - client_mock._execute_ping_and_warms = lambda *args: ( - self._get_target_class()._execute_ping_and_warms(client_mock, *args) + client_mock._execute_ping_and_warms = ( + lambda *args: self._get_target_class()._execute_ping_and_warms( + client_mock, *args + ) ) with mock.patch.object( CrossSync._Sync_Impl, "gather_partials", CrossSync._Sync_Impl.Mock() @@ -252,8 +254,10 @@ def test__ping_and_warm_instances(self): def test__ping_and_warm_single_instance(self): """should be able to call ping and warm with single instance""" client_mock = mock.Mock() - client_mock._execute_ping_and_warms = lambda *args: ( - self._get_target_class()._execute_ping_and_warms(client_mock, *args) + client_mock._execute_ping_and_warms = ( + lambda *args: self._get_target_class()._execute_ping_and_warms( + client_mock, *args + ) ) with mock.patch.object( CrossSync._Sync_Impl, "gather_partials", CrossSync._Sync_Impl.Mock() @@ -1322,11 +1326,11 @@ def _make_client(self, *args, **kwargs): def _make_table(self, *args, **kwargs): client_mock = mock.Mock() - client_mock._register_instance.side_effect = lambda *args, **kwargs: ( - CrossSync._Sync_Impl.yield_to_event_loop() + client_mock._register_instance.side_effect = ( + lambda *args, **kwargs: CrossSync._Sync_Impl.yield_to_event_loop() ) - client_mock._remove_instance_registration.side_effect = lambda *args, **kwargs: ( - CrossSync._Sync_Impl.yield_to_event_loop() + client_mock._remove_instance_registration.side_effect = ( + lambda *args, **kwargs: CrossSync._Sync_Impl.yield_to_event_loop() ) kwargs["instance_id"] = kwargs.get( "instance_id", args[0] if args else "instance" @@ -1788,8 +1792,9 @@ def test_read_rows_sharded_multiple_queries(self): with mock.patch.object( table.client._gapic_client, "read_rows" ) as read_rows: - read_rows.side_effect = lambda *args, **kwargs: ( - CrossSync._Sync_Impl.TestReadRows._make_gapic_stream( + read_rows.side_effect = ( + lambda *args, + **kwargs: CrossSync._Sync_Impl.TestReadRows._make_gapic_stream( [ CrossSync._Sync_Impl.TestReadRows._make_chunk(row_key=k) for k in args[0].rows.row_keys @@ -2900,7 +2905,6 @@ def prepare_mock(self, client): yield prepare_mock def _make_gapic_stream(self, sample_list: list["ExecuteQueryResponse" | Exception]): - class MockStream: def __init__(self, sample_list): self.sample_list = sample_list