Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 6304519

Browse files
authored
Merge branch 'main' into dbt-blog-samples
2 parents 3e3cd67 + f7f686c commit 6304519

11 files changed

Lines changed: 220 additions & 7 deletions

File tree

CHANGELOG.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,35 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.15.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.14.0...v2.15.0) (2025-08-11)
8+
9+
10+
### Features
11+
12+
* Add `st_buffer`, `st_centroid`, and `st_convexhull` and their corresponding GeoSeries methods ([#1963](https://github.com/googleapis/python-bigquery-dataframes/issues/1963)) ([c4c7fa5](https://github.com/googleapis/python-bigquery-dataframes/commit/c4c7fa578e135e7f0e31ad3063db379514957acc))
13+
* Add first, last support to GroupBy ([#1969](https://github.com/googleapis/python-bigquery-dataframes/issues/1969)) ([41dda88](https://github.com/googleapis/python-bigquery-dataframes/commit/41dda889860c0ed8ca2eab81b34a9d71372c69f7))
14+
* Add value_counts to GroupBy classes ([#1974](https://github.com/googleapis/python-bigquery-dataframes/issues/1974)) ([82175a4](https://github.com/googleapis/python-bigquery-dataframes/commit/82175a4d0fa41d8aee11efdf8778a21bb70b1c0f))
15+
* Allow callable as a conditional or replacement input in DataFrame.where ([#1971](https://github.com/googleapis/python-bigquery-dataframes/issues/1971)) ([a8d57d2](https://github.com/googleapis/python-bigquery-dataframes/commit/a8d57d2f7075158eff69ec65a14c232756ab72a6))
16+
* Can cast locally in hybrid engine ([#1944](https://github.com/googleapis/python-bigquery-dataframes/issues/1944)) ([d9bc4a5](https://github.com/googleapis/python-bigquery-dataframes/commit/d9bc4a5940e9930d5e3c3bfffdadd2f91f96b53b))
17+
* Df.join lsuffix and rsuffix support ([#1857](https://github.com/googleapis/python-bigquery-dataframes/issues/1857)) ([26515c3](https://github.com/googleapis/python-bigquery-dataframes/commit/26515c34c4f0a5e4602d2f59bf229d41e0fc9196))
18+
19+
20+
### Bug Fixes
21+
22+
* Add warnings for duplicated or conflicting type hints in bigfram… ([#1956](https://github.com/googleapis/python-bigquery-dataframes/issues/1956)) ([d38e42c](https://github.com/googleapis/python-bigquery-dataframes/commit/d38e42ce689e65f57223e9a8b14c4262cba08966))
23+
* Make `remote_function` more robust when there are `create_function` retries ([#1973](https://github.com/googleapis/python-bigquery-dataframes/issues/1973)) ([cd954ac](https://github.com/googleapis/python-bigquery-dataframes/commit/cd954ac07ad5e5820a20b941d3c6cab7cfcc1f29))
24+
* Make ExecutionMetrics stats tracking more robust to missing stats ([#1977](https://github.com/googleapis/python-bigquery-dataframes/issues/1977)) ([feb3ff4](https://github.com/googleapis/python-bigquery-dataframes/commit/feb3ff4b543eb8acbf6adf335b67a266a1cf4297))
25+
26+
27+
### Performance Improvements
28+
29+
* Remove an unnecessary extra `dry_run` query from `read_gbq_table` ([#1972](https://github.com/googleapis/python-bigquery-dataframes/issues/1972)) ([d17b711](https://github.com/googleapis/python-bigquery-dataframes/commit/d17b711750d281ef3efd42c160f3784cd60021ae))
30+
31+
32+
### Documentation
33+
34+
* Divide BQ DataFrames quickstart code cell ([#1975](https://github.com/googleapis/python-bigquery-dataframes/issues/1975)) ([fedb8f2](https://github.com/googleapis/python-bigquery-dataframes/commit/fedb8f23120aa315c7e9dd6f1bf1255ccf1ebc48))
35+
736
## [2.14.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.13.0...v2.14.0) (2025-08-05)
837

938

bigframes/core/compile/polars/compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def compile_op(self, op: ops.ScalarOp, *args: pl.Expr) -> pl.Expr:
168168

169169
@compile_op.register(gen_ops.InvertOp)
170170
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
171-
return ~input
171+
return input.not_()
172172

173173
@compile_op.register(num_ops.AbsOp)
174174
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:

bigframes/core/compile/polars/lowering.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,18 @@
1414

1515
import dataclasses
1616

17+
import numpy as np
18+
1719
from bigframes import dtypes
1820
from bigframes.core import bigframe_node, expression
1921
from bigframes.core.rewrite import op_lowering
20-
from bigframes.operations import comparison_ops, datetime_ops, json_ops, numeric_ops
22+
from bigframes.operations import (
23+
comparison_ops,
24+
datetime_ops,
25+
generic_ops,
26+
json_ops,
27+
numeric_ops,
28+
)
2129
import bigframes.operations as ops
2230

2331
# TODO: Would be more precise to actually have separate op set for polars ops (where they diverge from the original ops)
@@ -288,6 +296,26 @@ def lower(self, expr: expression.OpExpression) -> expression.Expression:
288296
return _lower_cast(expr.op, expr.inputs[0])
289297

290298

299+
def invert_bytes(byte_string):
300+
inverted_bytes = ~np.frombuffer(byte_string, dtype=np.uint8)
301+
return inverted_bytes.tobytes()
302+
303+
304+
class LowerInvertOp(op_lowering.OpLoweringRule):
305+
@property
306+
def op(self) -> type[ops.ScalarOp]:
307+
return generic_ops.InvertOp
308+
309+
def lower(self, expr: expression.OpExpression) -> expression.Expression:
310+
assert isinstance(expr.op, generic_ops.InvertOp)
311+
arg = expr.children[0]
312+
if arg.output_type == dtypes.BYTES_DTYPE:
313+
return generic_ops.PyUdfOp(invert_bytes, dtypes.BYTES_DTYPE).as_expr(
314+
expr.inputs[0]
315+
)
316+
return expr
317+
318+
291319
def _coerce_comparables(
292320
expr1: expression.Expression,
293321
expr2: expression.Expression,
@@ -385,6 +413,7 @@ def _lower_cast(cast_op: ops.AsTypeOp, arg: expression.Expression):
385413
LowerFloorDivRule(),
386414
LowerModRule(),
387415
LowerAsTypeRule(),
416+
LowerInvertOp(),
388417
)
389418

390419

bigframes/core/compile/polars/operations/generic_ops.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,14 @@ def isnull_op_impl(
4545
input: pl.Expr,
4646
) -> pl.Expr:
4747
return input.is_null()
48+
49+
50+
@polars_compiler.register_op(generic_ops.PyUdfOp)
51+
def py_udf_op_impl(
52+
compiler: polars_compiler.PolarsExpressionCompiler,
53+
op: generic_ops.PyUdfOp, # type: ignore
54+
input: pl.Expr,
55+
) -> pl.Expr:
56+
return input.map_elements(
57+
op.fn, return_dtype=polars_compiler._DTYPE_MAPPING[op._output_type]
58+
)

bigframes/operations/generic_ops.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,3 +446,15 @@ class SqlScalarOp(base_ops.NaryOp):
446446

447447
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
448448
return self._output_type
449+
450+
451+
@dataclasses.dataclass(frozen=True)
452+
class PyUdfOp(base_ops.NaryOp):
453+
"""Represents a local UDF."""
454+
455+
name: typing.ClassVar[str] = "py_udf"
456+
fn: typing.Callable
457+
_output_type: dtypes.ExpressionType
458+
459+
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
460+
return self._output_type

bigframes/session/polars_executor.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@
5858
numeric_ops.FloorDivOp,
5959
numeric_ops.ModOp,
6060
generic_ops.AsTypeOp,
61+
generic_ops.WhereOp,
62+
generic_ops.CoalesceOp,
63+
generic_ops.FillNaOp,
64+
generic_ops.CaseWhenOp,
65+
generic_ops.InvertOp,
6166
)
6267
_COMPATIBLE_AGG_OPS = (
6368
agg_ops.SizeOp,

bigframes/version.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
__version__ = "2.14.0"
15+
__version__ = "2.15.0"
1616

1717
# {x-release-please-start-date}
18-
__release_date__ = "2025-08-05"
18+
__release_date__ = "2025-08-11"
1919
# {x-release-please-end}

tests/system/small/engines/test_generic_ops.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def test_engines_astype_int(scalars_array_value: array_value.ArrayValue, engine)
5959
ops.AsTypeOp(to_type=bigframes.dtypes.INT_DTYPE),
6060
excluded_cols=["string_col"],
6161
)
62+
6263
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
6364

6465

@@ -73,6 +74,7 @@ def test_engines_astype_string_int(scalars_array_value: array_value.ArrayValue,
7374
for val in vals
7475
]
7576
)
77+
7678
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
7779

7880

@@ -83,6 +85,7 @@ def test_engines_astype_float(scalars_array_value: array_value.ArrayValue, engin
8385
ops.AsTypeOp(to_type=bigframes.dtypes.FLOAT_DTYPE),
8486
excluded_cols=["string_col"],
8587
)
88+
8689
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
8790

8891

@@ -99,6 +102,7 @@ def test_engines_astype_string_float(
99102
for val in vals
100103
]
101104
)
105+
102106
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
103107

104108

@@ -107,6 +111,7 @@ def test_engines_astype_bool(scalars_array_value: array_value.ArrayValue, engine
107111
arr = apply_op(
108112
scalars_array_value, ops.AsTypeOp(to_type=bigframes.dtypes.BOOL_DTYPE)
109113
)
114+
110115
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
111116

112117

@@ -118,6 +123,7 @@ def test_engines_astype_string(scalars_array_value: array_value.ArrayValue, engi
118123
ops.AsTypeOp(to_type=bigframes.dtypes.STRING_DTYPE),
119124
excluded_cols=["float64_col"],
120125
)
126+
121127
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
122128

123129

@@ -128,6 +134,7 @@ def test_engines_astype_numeric(scalars_array_value: array_value.ArrayValue, eng
128134
ops.AsTypeOp(to_type=bigframes.dtypes.NUMERIC_DTYPE),
129135
excluded_cols=["string_col"],
130136
)
137+
131138
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
132139

133140

@@ -144,6 +151,7 @@ def test_engines_astype_string_numeric(
144151
for val in vals
145152
]
146153
)
154+
147155
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
148156

149157

@@ -154,6 +162,7 @@ def test_engines_astype_date(scalars_array_value: array_value.ArrayValue, engine
154162
ops.AsTypeOp(to_type=bigframes.dtypes.DATE_DTYPE),
155163
excluded_cols=["string_col"],
156164
)
165+
157166
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
158167

159168

@@ -170,6 +179,7 @@ def test_engines_astype_string_date(
170179
for val in vals
171180
]
172181
)
182+
173183
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
174184

175185

@@ -180,6 +190,7 @@ def test_engines_astype_datetime(scalars_array_value: array_value.ArrayValue, en
180190
ops.AsTypeOp(to_type=bigframes.dtypes.DATETIME_DTYPE),
181191
excluded_cols=["string_col"],
182192
)
193+
183194
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
184195

185196

@@ -196,6 +207,7 @@ def test_engines_astype_string_datetime(
196207
for val in vals
197208
]
198209
)
210+
199211
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
200212

201213

@@ -206,6 +218,7 @@ def test_engines_astype_timestamp(scalars_array_value: array_value.ArrayValue, e
206218
ops.AsTypeOp(to_type=bigframes.dtypes.TIMESTAMP_DTYPE),
207219
excluded_cols=["string_col"],
208220
)
221+
209222
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
210223

211224

@@ -226,6 +239,7 @@ def test_engines_astype_string_timestamp(
226239
for val in vals
227240
]
228241
)
242+
229243
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
230244

231245

@@ -236,6 +250,7 @@ def test_engines_astype_time(scalars_array_value: array_value.ArrayValue, engine
236250
ops.AsTypeOp(to_type=bigframes.dtypes.TIME_DTYPE),
237251
excluded_cols=["string_col", "int64_col", "int64_too"],
238252
)
253+
239254
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
240255

241256

@@ -256,6 +271,7 @@ def test_engines_astype_from_json(scalars_array_value: array_value.ArrayValue, e
256271
),
257272
]
258273
arr, _ = scalars_array_value.compute_values(exprs)
274+
259275
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
260276

261277

@@ -265,4 +281,112 @@ def test_engines_astype_timedelta(scalars_array_value: array_value.ArrayValue, e
265281
scalars_array_value,
266282
ops.AsTypeOp(to_type=bigframes.dtypes.TIMEDELTA_DTYPE),
267283
)
284+
285+
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
286+
287+
288+
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
289+
def test_engines_where_op(scalars_array_value: array_value.ArrayValue, engine):
290+
arr, _ = scalars_array_value.compute_values(
291+
[
292+
ops.where_op.as_expr(
293+
expression.deref("int64_col"),
294+
expression.deref("bool_col"),
295+
expression.deref("float64_col"),
296+
)
297+
]
298+
)
299+
300+
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
301+
302+
303+
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
304+
def test_engines_coalesce_op(scalars_array_value: array_value.ArrayValue, engine):
305+
arr, _ = scalars_array_value.compute_values(
306+
[
307+
ops.coalesce_op.as_expr(
308+
expression.deref("int64_col"),
309+
expression.deref("float64_col"),
310+
)
311+
]
312+
)
313+
314+
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
315+
316+
317+
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
318+
def test_engines_fillna_op(scalars_array_value: array_value.ArrayValue, engine):
319+
arr, _ = scalars_array_value.compute_values(
320+
[
321+
ops.fillna_op.as_expr(
322+
expression.deref("int64_col"),
323+
expression.deref("float64_col"),
324+
)
325+
]
326+
)
327+
328+
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
329+
330+
331+
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
332+
def test_engines_casewhen_op_single_case(
333+
scalars_array_value: array_value.ArrayValue, engine
334+
):
335+
arr, _ = scalars_array_value.compute_values(
336+
[
337+
ops.case_when_op.as_expr(
338+
expression.deref("bool_col"),
339+
expression.deref("int64_col"),
340+
)
341+
]
342+
)
343+
344+
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
345+
346+
347+
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
348+
def test_engines_casewhen_op_double_case(
349+
scalars_array_value: array_value.ArrayValue, engine
350+
):
351+
arr, _ = scalars_array_value.compute_values(
352+
[
353+
ops.case_when_op.as_expr(
354+
ops.gt_op.as_expr(expression.deref("int64_col"), expression.const(3)),
355+
expression.deref("int64_col"),
356+
ops.lt_op.as_expr(expression.deref("int64_col"), expression.const(-3)),
357+
expression.deref("int64_too"),
358+
)
359+
]
360+
)
361+
362+
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
363+
364+
365+
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
366+
def test_engines_isnull_op(scalars_array_value: array_value.ArrayValue, engine):
367+
arr, _ = scalars_array_value.compute_values(
368+
[ops.isnull_op.as_expr(expression.deref("string_col"))]
369+
)
370+
371+
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
372+
373+
374+
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
375+
def test_engines_notnull_op(scalars_array_value: array_value.ArrayValue, engine):
376+
arr, _ = scalars_array_value.compute_values(
377+
[ops.notnull_op.as_expr(expression.deref("string_col"))]
378+
)
379+
380+
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
381+
382+
383+
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
384+
def test_engines_invert_op(scalars_array_value: array_value.ArrayValue, engine):
385+
arr, _ = scalars_array_value.compute_values(
386+
[
387+
ops.invert_op.as_expr(expression.deref("bytes_col")),
388+
ops.invert_op.as_expr(expression.deref("bool_col")),
389+
]
390+
)
391+
268392
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)

0 commit comments

Comments
 (0)