Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion bigframes/pandas/io/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ def _read_gbq_colab( # type: ignore[overload-overlap]
*,
pyformat_args: Optional[Dict[str, Any]] = ...,
dry_run: Literal[False] = ...,
use_hybrid_engine: bool = ...,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect you'll get unit tests failing because this parameter is in bigframes.pandas but not the session.

Now that we have the benchmarks going again, I think it'd be safe to remove the parameter.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok ,reverted

) -> bigframes.dataframe.DataFrame:
...

Expand All @@ -289,6 +290,7 @@ def _read_gbq_colab(
*,
pyformat_args: Optional[Dict[str, Any]] = ...,
dry_run: Literal[True] = ...,
use_hybrid_engine: bool = ...,
) -> pandas.Series:
...

Expand All @@ -298,6 +300,7 @@ def _read_gbq_colab(
*,
pyformat_args: Optional[Dict[str, Any]] = None,
dry_run: bool = False,
use_hybrid_engine: bool = False,
) -> bigframes.dataframe.DataFrame | pandas.Series:
"""A Colab-specific version of read_gbq.

Expand All @@ -312,7 +315,9 @@ def _read_gbq_colab(
dry_run (bool):
If True, estimates the query results size without returning data.
The return will be a pandas Series with query metadata.

use_hybrid_engine (bool):
If True, and session not started, new session started will use
hybrid execution which pushes some execution to local cpu.
Returns:
Union[bigframes.dataframe.DataFrame, pandas.Series]:
A BigQuery DataFrame if `dry_run` is False, otherwise a pandas Series.
Expand Down Expand Up @@ -345,6 +350,8 @@ def _read_gbq_colab(
dry_run=True,
)
_set_default_session_location_if_possible_deferred_query(create_query)
if use_hybrid_engine and not config.options.bigquery._session_started:
config.options.bigquery.enable_polars_execution = True

return global_session.with_default_session(
bigframes.session.Session._read_gbq_colab,
Expand Down
31 changes: 31 additions & 0 deletions tests/system/small/session/test_read_gbq_colab.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
import pandas.testing
import pytest

import bigframes
import bigframes.pandas

pytest.importorskip("polars")


def test_read_gbq_colab_to_pandas_batches_preserves_order_by(maybe_ordered_session):
# This query should return enough results to be too big to fit in a single
Expand Down Expand Up @@ -59,6 +64,32 @@ def test_read_gbq_colab_to_pandas_batches_preserves_order_by(maybe_ordered_sessi
assert executions_after == executions_before_python == executions_before_sql + 1


def test_read_gbq_colab_fresh_session_is_hybrid():
bigframes.close_session()
df = bigframes.pandas._read_gbq_colab(
"""
SELECT
name,
SUM(number) AS total
FROM
`bigquery-public-data.usa_names.usa_1910_2013`
WHERE state LIKE 'W%'
GROUP BY name
ORDER BY total DESC
LIMIT 300
""",
use_hybrid_engine=True,
)
session = df._session
executions_before_python = session._metrics.execution_count
result = df.sort_values("name").peek(100)
executions_after = session._metrics.execution_count

assert len(result) == 100
assert session._executor._enable_polars_execution is True # type: ignore
assert executions_after == executions_before_python == 1


def test_read_gbq_colab_peek_avoids_requery(maybe_ordered_session):
executions_before_sql = maybe_ordered_session._metrics.execution_count
df = maybe_ordered_session._read_gbq_colab(
Expand Down