From eab1633b85f5e15db83b32c0186c35925aa7980d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 3 Jun 2026 16:53:24 +0000 Subject: [PATCH 01/15] fix(bigframes): include pyopenssl as a dependency Also, support pandas 3.0 in various system tests. --- packages/bigframes/setup.py | 2 +- .../small/functions/test_remote_function.py | 23 ++++++++++++++++--- .../tests/system/small/test_magics.py | 4 ++-- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/packages/bigframes/setup.py b/packages/bigframes/setup.py index 819f8489e36e..5e5a3c5ffa07 100644 --- a/packages/bigframes/setup.py +++ b/packages/bigframes/setup.py @@ -38,7 +38,7 @@ "fsspec >=2023.3.0", "gcsfs >=2023.3.0, !=2025.5.0, !=2026.2.0, !=2026.3.0", "geopandas >=0.12.2", - "google-auth >=2.15.0,<3.0", + "google-auth[pyopenssl] >=2.15.0,<3.0", "google-cloud-bigquery[bqstorage,pandas] >=3.36.0", # 2.30 needed for arrow support. "google-cloud-bigquery-storage >= 2.30.0, < 3.0.0", diff --git a/packages/bigframes/tests/system/small/functions/test_remote_function.py b/packages/bigframes/tests/system/small/functions/test_remote_function.py index a970fab64db3..74e667c76fd4 100644 --- a/packages/bigframes/tests/system/small/functions/test_remote_function.py +++ b/packages/bigframes/tests/system/small/functions/test_remote_function.py @@ -468,7 +468,12 @@ def add_one(x): pd_int64_df = scalars_pandas_df[int64_cols] pd_int64_df_filtered = pd_int64_df.dropna() - pd_result = pd_int64_df_filtered.applymap(add_one) + + # TODO(swast): Remove when pandas 2.1.x+ is the minimum supported. + if hasattr(pd_int64_df_filtered, "applymap"): + pd_result = pd_int64_df_filtered.applymap(add_one) + else: + pd_result = pd_int64_df_filtered.map(add_one) # TODO(shobs): Figure why pandas .applymap() changes the dtype, i.e. # pd_int64_df_filtered.dtype is Int64Dtype() # pd_int64_df_filtered.applymap(lambda x: x).dtype is int64. @@ -503,7 +508,13 @@ def add_one(x): pd_int64_df = scalars_pandas_df[int64_cols] pd_int64_df_filtered = pd_int64_df[pd_int64_df["int64_col"].notnull()] - pd_result = pd_int64_df_filtered.applymap(add_one) + + # TODO(swast): Remove when pandas 2.1.x+ is the minimum supported. + if hasattr(pd_int64_df_filtered, "applymap"): + pd_result = pd_int64_df_filtered.applymap(add_one) + else: + pd_result = pd_int64_df_filtered.map(add_one) + # TODO(shobs): Figure why pandas .applymap() changes the dtype, i.e. # pd_int64_df_filtered.dtype is Int64Dtype() # pd_int64_df_filtered.applymap(lambda x: x).dtype is int64. @@ -536,7 +547,13 @@ def add_one(x): bf_result = bf_int64_df.applymap(remote_add_one, na_action="ignore").to_pandas() pd_int64_df = scalars_pandas_df[int64_cols] - pd_result = pd_int64_df.applymap(add_one, na_action="ignore") + + # TODO(swast): Remove when pandas 2.1.x+ is the minimum supported. + if hasattr(pd_int64_df, "applymap"): + pd_result = pd_int64_df.applymap(add_one, na_action="ignore") + else: + pd_result = pd_int64_df.map(add_one, na_action="ignore") + # TODO(shobs): Figure why pandas .applymap() changes the dtype, i.e. # pd_int64_df_filtered.dtype is Int64Dtype() # pd_int64_df_filtered.applymap(lambda x: x).dtype is int64. diff --git a/packages/bigframes/tests/system/small/test_magics.py b/packages/bigframes/tests/system/small/test_magics.py index 91ada5b9e34a..eac0f233f98e 100644 --- a/packages/bigframes/tests/system/small/test_magics.py +++ b/packages/bigframes/tests/system/small/test_magics.py @@ -44,7 +44,7 @@ def test_magic_select_lit_to_var(ip): assert "dst_var" in ip.user_ns result_df = ip.user_ns["dst_var"] assert result_df.shape == (1, 1) - assert result_df.loc[0, 0] == 3 + assert result_df.to_pandas().iloc[0, 0] == 3 def test_magic_select_lit_dry_run(ip): @@ -97,4 +97,4 @@ def test_magic_select_interpolate(ip): assert "dst_var" in ip.user_ns result_df = ip.user_ns["dst_var"] assert result_df.shape == (1, 1) - assert result_df.loc[0, 0] == 9 + assert result_df.loc[0, "total"] == 9 From 01afa1263864f9c991abc7991347f63990d198b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 3 Jun 2026 17:03:12 +0000 Subject: [PATCH 02/15] Apply suggestions from code review Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .../small/functions/test_remote_function.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/bigframes/tests/system/small/functions/test_remote_function.py b/packages/bigframes/tests/system/small/functions/test_remote_function.py index 74e667c76fd4..869b26ca38c5 100644 --- a/packages/bigframes/tests/system/small/functions/test_remote_function.py +++ b/packages/bigframes/tests/system/small/functions/test_remote_function.py @@ -470,10 +470,10 @@ def add_one(x): pd_int64_df_filtered = pd_int64_df.dropna() # TODO(swast): Remove when pandas 2.1.x+ is the minimum supported. - if hasattr(pd_int64_df_filtered, "applymap"): - pd_result = pd_int64_df_filtered.applymap(add_one) - else: + if hasattr(pd_int64_df_filtered, "map"): pd_result = pd_int64_df_filtered.map(add_one) + else: + pd_result = pd_int64_df_filtered.applymap(add_one) # TODO(shobs): Figure why pandas .applymap() changes the dtype, i.e. # pd_int64_df_filtered.dtype is Int64Dtype() # pd_int64_df_filtered.applymap(lambda x: x).dtype is int64. @@ -510,10 +510,10 @@ def add_one(x): pd_int64_df_filtered = pd_int64_df[pd_int64_df["int64_col"].notnull()] # TODO(swast): Remove when pandas 2.1.x+ is the minimum supported. - if hasattr(pd_int64_df_filtered, "applymap"): - pd_result = pd_int64_df_filtered.applymap(add_one) - else: + if hasattr(pd_int64_df_filtered, "map"): pd_result = pd_int64_df_filtered.map(add_one) + else: + pd_result = pd_int64_df_filtered.applymap(add_one) # TODO(shobs): Figure why pandas .applymap() changes the dtype, i.e. # pd_int64_df_filtered.dtype is Int64Dtype() @@ -549,10 +549,10 @@ def add_one(x): pd_int64_df = scalars_pandas_df[int64_cols] # TODO(swast): Remove when pandas 2.1.x+ is the minimum supported. - if hasattr(pd_int64_df, "applymap"): - pd_result = pd_int64_df.applymap(add_one, na_action="ignore") - else: + if hasattr(pd_int64_df, "map"): pd_result = pd_int64_df.map(add_one, na_action="ignore") + else: + pd_result = pd_int64_df.applymap(add_one, na_action="ignore") # TODO(shobs): Figure why pandas .applymap() changes the dtype, i.e. # pd_int64_df_filtered.dtype is Int64Dtype() From 478090f38e4b11541e700383baf88fc0ee9dbbdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 3 Jun 2026 18:07:11 +0000 Subject: [PATCH 03/15] fix unit tests --- packages/bigframes/bigframes/testing/utils.py | 5 ++ .../test_bigframes_sql_scalar/out.sql | 4 +- .../test_sql_scalar/out.sql | 4 +- .../core/test_dataframe_accessor.py | 74 +++++++++---------- packages/bigframes/tests/unit/test_col.py | 4 +- 5 files changed, 48 insertions(+), 43 deletions(-) diff --git a/packages/bigframes/bigframes/testing/utils.py b/packages/bigframes/bigframes/testing/utils.py index b3b8ba1ab921..6be74115341c 100644 --- a/packages/bigframes/bigframes/testing/utils.py +++ b/packages/bigframes/bigframes/testing/utils.py @@ -93,6 +93,11 @@ def assert_series_equivalent(pd_series: pd.Series, bf_series: bpd.Series, **kwar def _normalize_all_nulls(col: pd.Series) -> pd.Series: if pd_types.is_float_dtype(col.dtype): col = col.astype("float64").astype("Float64") + elif col.dtype == "object": + try: + col = col.astype("Float64") + except (TypeError, ValueError, SystemError): + pass return col diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_bigframes_sql_scalar/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_bigframes_sql_scalar/out.sql index 14853067c700..2c3f8c230edd 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_bigframes_sql_scalar/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_bigframes_sql_scalar/out.sql @@ -1,4 +1,4 @@ SELECT `rowindex`, - ROUND(`int64_col` + `int64_too`) AS `0` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file + ROUND(`int64_col` + `int64_too`) AS `bigframes_unnamed_column` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_sql_scalar/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_sql_scalar/out.sql index 14853067c700..2c3f8c230edd 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_sql_scalar/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_sql_scalar/out.sql @@ -1,4 +1,4 @@ SELECT `rowindex`, - ROUND(`int64_col` + `int64_too`) AS `0` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file + ROUND(`int64_col` + `int64_too`) AS `bigframes_unnamed_column` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` diff --git a/packages/bigframes/tests/unit/extensions/core/test_dataframe_accessor.py b/packages/bigframes/tests/unit/extensions/core/test_dataframe_accessor.py index 914a448700f4..7ab4f5176980 100644 --- a/packages/bigframes/tests/unit/extensions/core/test_dataframe_accessor.py +++ b/packages/bigframes/tests/unit/extensions/core/test_dataframe_accessor.py @@ -96,16 +96,16 @@ def mock_generate(prompt, **kwargs): output_schema={"res": "STRING"}, ) - assert result == ( - df["text_input"], - { - "connection_id": "conn", - "endpoint": "endpoint", - "request_type": "dedicated", - "model_params": {"temp": 0.5}, - "output_schema": {"res": "STRING"}, - }, - ) + assert isinstance(result, tuple) + assert len(result) == 2 + pd.testing.assert_series_equal(result[0], df["text_input"]) + assert result[1] == { + "connection_id": "conn", + "endpoint": "endpoint", + "request_type": "dedicated", + "model_params": {"temp": 0.5}, + "output_schema": {"res": "STRING"}, + } def test_bigframes_ai_generate(scalar_types_df: bpd.DataFrame, monkeypatch): @@ -147,15 +147,15 @@ def mock_generate_bool(prompt, **kwargs): model_params={"temp": 0.5}, ) - assert result == ( - df["text_input"], - { - "connection_id": "conn", - "endpoint": "endpoint", - "request_type": "dedicated", - "model_params": {"temp": 0.5}, - }, - ) + assert isinstance(result, tuple) + assert len(result) == 2 + pd.testing.assert_series_equal(result[0], df["text_input"]) + assert result[1] == { + "connection_id": "conn", + "endpoint": "endpoint", + "request_type": "dedicated", + "model_params": {"temp": 0.5}, + } def test_bigframes_ai_generate_bool(scalar_types_df: bpd.DataFrame, monkeypatch): @@ -196,15 +196,15 @@ def mock_generate_int(prompt, **kwargs): model_params={"temp": 0.5}, ) - assert result == ( - df["text_input"], - { - "connection_id": "conn", - "endpoint": "endpoint", - "request_type": "dedicated", - "model_params": {"temp": 0.5}, - }, - ) + assert isinstance(result, tuple) + assert len(result) == 2 + pd.testing.assert_series_equal(result[0], df["text_input"]) + assert result[1] == { + "connection_id": "conn", + "endpoint": "endpoint", + "request_type": "dedicated", + "model_params": {"temp": 0.5}, + } def test_bigframes_ai_generate_int(scalar_types_df: bpd.DataFrame, monkeypatch): @@ -245,15 +245,15 @@ def mock_generate_double(prompt, **kwargs): model_params={"temp": 0.5}, ) - assert result == ( - df["text_input"], - { - "connection_id": "conn", - "endpoint": "endpoint", - "request_type": "dedicated", - "model_params": {"temp": 0.5}, - }, - ) + assert isinstance(result, tuple) + assert len(result) == 2 + pd.testing.assert_series_equal(result[0], df["text_input"]) + assert result[1] == { + "connection_id": "conn", + "endpoint": "endpoint", + "request_type": "dedicated", + "model_params": {"temp": 0.5}, + } def test_bigframes_ai_generate_double(scalar_types_df: bpd.DataFrame, monkeypatch): diff --git a/packages/bigframes/tests/unit/test_col.py b/packages/bigframes/tests/unit/test_col.py index 9f5bbca5d9bc..c8caf9136c0a 100644 --- a/packages/bigframes/tests/unit/test_col.py +++ b/packages/bigframes/tests/unit/test_col.py @@ -88,10 +88,10 @@ def scalars_dfs( def test_pd_col_unary_operators(scalars_dfs, op): scalars_df, scalars_pandas_df = scalars_dfs bf_kwargs = { - "result": op(bpd.col("float64_col")), + "result": op(bpd.col("bool_col")), } pd_kwargs = { - "result": op(pd.col("float64_col")), # type: ignore + "result": op(pd.col("bool_col")), # type: ignore } df = scalars_df.assign(**bf_kwargs) From 7aacf90fd7fc587bfa068841eb90a2b6d779a0fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 3 Jun 2026 18:17:37 +0000 Subject: [PATCH 04/15] robust to newlines --- .../unit/core/compile/sqlglot/test_dataframe_accessor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/test_dataframe_accessor.py b/packages/bigframes/tests/unit/core/compile/sqlglot/test_dataframe_accessor.py index 26e4d1788059..c87e009688ff 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/test_dataframe_accessor.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/test_dataframe_accessor.py @@ -42,7 +42,7 @@ def to_pandas(series, *, ordered): ) session.read_pandas.assert_called_once() - snapshot.assert_match(result, "out.sql") + snapshot.assert_match(result.strip() + "\n", "out.sql") def test_bigframes_sql_scalar(scalar_types_df: bpd.DataFrame, snapshot): @@ -57,4 +57,4 @@ def test_bigframes_sql_scalar(scalar_types_df: bpd.DataFrame, snapshot): session.read_pandas.assert_not_called() # Bigframes implementation returns a bigframes.series.Series sql, _, _ = result.to_frame()._to_sql_query(include_index=True) - snapshot.assert_match(sql, "out.sql") + snapshot.assert_match(sql.strip() + "\n", "out.sql") From addc623d89d1eaf0dc07eca98412d3235447e0c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 3 Jun 2026 18:31:16 +0000 Subject: [PATCH 05/15] fix doctests --- packages/bigframes/bigframes/core/blocks.py | 6 ++++++ .../third_party/bigframes_vendored/pandas/core/frame.py | 9 +++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/packages/bigframes/bigframes/core/blocks.py b/packages/bigframes/bigframes/core/blocks.py index 33f5aaab5c7d..0ab5f6729eed 100644 --- a/packages/bigframes/bigframes/core/blocks.py +++ b/packages/bigframes/bigframes/core/blocks.py @@ -1989,6 +1989,10 @@ def _generate_resample_label( ) level = level or 0 col_id = self.index.resolve_level(level)[0] + if isinstance(level, int): + resample_label = self.index.names[level] + else: + resample_label = level # Reset index to make the resampling level a column, then drop all other index columns. # This simplifies processing by focusing solely on the column required for resampling. block = self.reset_index(drop=False) @@ -2007,6 +2011,7 @@ def _generate_resample_label( raise KeyError(f"The grouper name {on} is not found") col_id = matches[0] + resample_label = on block = self if level is None: dtype = self._column_type(col_id) @@ -2099,6 +2104,7 @@ def _generate_resample_label( block.value_columns[0], block.value_columns[1], op=ops.IntegerLabelToDatetimeOp(freq=freq, label=label, origin=origin), + result_label=resample_label, ) # After multiple merges, the columns: diff --git a/packages/bigframes/third_party/bigframes_vendored/pandas/core/frame.py b/packages/bigframes/third_party/bigframes_vendored/pandas/core/frame.py index 678fb5f65177..81380aff10c8 100644 --- a/packages/bigframes/third_party/bigframes_vendored/pandas/core/frame.py +++ b/packages/bigframes/third_party/bigframes_vendored/pandas/core/frame.py @@ -66,7 +66,7 @@ def axes(self) -> list: >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> df.axes[1:] - [Index(['col1', 'col2'], dtype='object')] + [Index(['col1', 'col2'], dtype='str')] """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1963,7 +1963,7 @@ def keys(self): ... 'B': [4, 5, 6], ... }) >>> df.keys() - Index(['A', 'B'], dtype='object') + Index(['A', 'B'], dtype='str') Returns: pandas.Index: Info axis. @@ -4819,7 +4819,8 @@ def resample( >>> df = bpd.DataFrame(data).set_index("timestamp_col") >>> df.resample(rule="7s").min() - int64_col int64_too + int64_col int64_too + timestamp_col 2021-01-01 12:59:55 0 10 2021-01-01 13:00:02 2 12 2021-01-01 13:00:09 9 19 @@ -6633,7 +6634,7 @@ def columns(self): [3 rows x 3 columns] >>> df.columns - Index(['Name', 'Age', 'Location'], dtype='object') + Index(['Name', 'Age', 'Location'], dtype='str') You can also set new labels for columns. From 13549bcca2e5d0a7506cc4c5a1fa90991dd33e5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 3 Jun 2026 18:34:22 +0000 Subject: [PATCH 06/15] standardize pandas version in sql scalar tests --- .../unit/core/compile/sqlglot/test_dataframe_accessor.py | 4 ++++ .../third_party/bigframes_vendored/pandas/core/frame.py | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/test_dataframe_accessor.py b/packages/bigframes/tests/unit/core/compile/sqlglot/test_dataframe_accessor.py index c87e009688ff..e430f5664975 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/test_dataframe_accessor.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/test_dataframe_accessor.py @@ -22,6 +22,10 @@ pytest.importorskip("pytest_snapshot") +# Only test on the latest pandas since column naming behavior is slightly +# different across versions, e.g. unnamed vs 0 for unnamed Series. +pytest.importorskip("pandas", minversion="3.0.0") + def test_sql_scalar(scalar_types_df: bpd.DataFrame, snapshot, monkeypatch): session = mock.create_autospec(bigframes.session.Session) diff --git a/packages/bigframes/third_party/bigframes_vendored/pandas/core/frame.py b/packages/bigframes/third_party/bigframes_vendored/pandas/core/frame.py index 81380aff10c8..f016cab47ae3 100644 --- a/packages/bigframes/third_party/bigframes_vendored/pandas/core/frame.py +++ b/packages/bigframes/third_party/bigframes_vendored/pandas/core/frame.py @@ -4833,7 +4833,8 @@ def resample( >>> df = bpd.DataFrame(data) >>> df.resample(rule="7s", on = "timestamp_col", origin="start").min() - int64_col int64_too + int64_col int64_too + timestamp_col 2021-01-01 13:00:00 0 10 2021-01-01 13:00:07 7 17 2021-01-01 13:00:14 14 24 @@ -6647,7 +6648,7 @@ def columns(self): [3 rows x 3 columns] >>> df.columns - Index(['NewName', 'NewAge', 'NewLocation'], dtype='object') + Index(['NewName', 'NewAge', 'NewLocation'], dtype='str') """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From 5335a7cf6cba3ecebc34653782170ba06b6f9cb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 3 Jun 2026 18:43:50 +0000 Subject: [PATCH 07/15] snapshot update --- .../test_compile_fromrange/test_compile_fromrange/out.sql | 4 ++-- .../tests/unit/core/compile/sqlglot/test_compile_fromrange.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_fromrange/test_compile_fromrange/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_fromrange/test_compile_fromrange/out.sql index 0b0e07056ab4..4f4e2496498f 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_fromrange/test_compile_fromrange/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_fromrange/test_compile_fromrange/out.sql @@ -60,7 +60,7 @@ WITH `bfcte_0` AS ( SELECT CAST(TIMESTAMP_MICROS( CAST(CAST(`bfcol_17` AS BIGNUMERIC) * 7000000 + CAST(UNIX_MICROS(CAST(CAST(`bfcol_8` AS DATE) AS TIMESTAMP)) AS BIGNUMERIC) AS INT64) - ) AS DATETIME) AS `bigframes_unnamed_index`, + ) AS DATETIME) AS `timestamp_col`, `bfcol_11` AS `int64_col`, `bfcol_12` AS `int64_too` FROM ( @@ -72,4 +72,4 @@ FROM ( LEFT JOIN `bfcte_5` ON `bfcol_17` = `bfcol_13` ORDER BY - `bfcol_17` ASC NULLS LAST \ No newline at end of file + `bfcol_17` ASC NULLS LAST diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_fromrange.py b/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_fromrange.py index ba2e2075517b..8c25ca0310cd 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_fromrange.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_fromrange.py @@ -32,4 +32,4 @@ def test_compile_fromrange(compiler_session, snapshot): sql, _, _ = df.resample(rule="7s")._block.to_sql_query( include_index=True, enable_cache=False ) - snapshot.assert_match(sql, "out.sql") + snapshot.assert_match(sql.strip() + "\n", "out.sql") From 7851eabb3ad5fab83984410900116da0a1bffb83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 3 Jun 2026 18:52:06 +0000 Subject: [PATCH 08/15] workaround for polars bug --- .../bigframes/core/compile/polars/compiler.py | 47 ++++++++++++++++++- .../bigframes_vendored/pandas/core/generic.py | 6 +-- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/packages/bigframes/bigframes/core/compile/polars/compiler.py b/packages/bigframes/bigframes/core/compile/polars/compiler.py index dac78f5c7b89..7a243f2ee4ed 100644 --- a/packages/bigframes/bigframes/core/compile/polars/compiler.py +++ b/packages/bigframes/bigframes/core/compile/polars/compiler.py @@ -178,8 +178,53 @@ def _( self, expression: ex.OpExpression, ) -> pl.Expr: - # TODO: Complete the implementation + import datetime + + import pyarrow as pa + op = expression.op + + # Workaround for Polars panic on nulls in timezone-aware datetimes for certain ops. + is_problematic_op = type(op) in ( + date_ops.YearOp, + date_ops.QuarterOp, + date_ops.MonthOp, + date_ops.DayOp, + date_ops.IsoWeekOp, + ) + + if is_problematic_op and len(expression.inputs) == 1: + input_expr = expression.inputs[0] + if ( + input_expr.is_resolved + and isinstance(input_expr.output_type, pd.ArrowDtype) + and isinstance(input_expr.output_type.pyarrow_dtype, pa.TimestampType) + and input_expr.output_type.pyarrow_dtype.tz is not None + ): + tz_str = input_expr.output_type.pyarrow_dtype.tz + if tz_str == "UTC": + dummy_tz = datetime.timezone.utc + else: + try: + from zoneinfo import ZoneInfo + + dummy_tz = ZoneInfo(tz_str) + except Exception: + dummy_tz = datetime.timezone.utc + + dummy_val = datetime.datetime(1970, 1, 1, tzinfo=dummy_tz) + + compiled_input = self.compile_expression(input_expr) + filled_input = compiled_input.fill_null(dummy_val) + compiled_op_with_fill = self.compile_op(op, filled_input) + + return ( + pl.when(compiled_input.is_null()) + .then(None) + .otherwise(compiled_op_with_fill) + ) + + # TODO: Complete the implementation args = tuple(map(self.compile_expression, expression.inputs)) return self.compile_op(op, *args) diff --git a/packages/bigframes/third_party/bigframes_vendored/pandas/core/generic.py b/packages/bigframes/third_party/bigframes_vendored/pandas/core/generic.py index a5a3e6098376..0e4ac335c8a0 100644 --- a/packages/bigframes/third_party/bigframes_vendored/pandas/core/generic.py +++ b/packages/bigframes/third_party/bigframes_vendored/pandas/core/generic.py @@ -629,9 +629,9 @@ def dtypes(self): >>> df = bpd.DataFrame({'float': [1.0], 'int': [1], 'string': ['foo']}) >>> df.dtypes - float Float64 - int Int64 - string string[pyarrow] + float Float64 + int Int64 + string string dtype: object Returns: From 300352bd2cb3e94439aa0711cb68c5cfdccedb18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 3 Jun 2026 19:04:43 +0000 Subject: [PATCH 09/15] add todo --- .../bigframes/core/compile/polars/compiler.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/packages/bigframes/bigframes/core/compile/polars/compiler.py b/packages/bigframes/bigframes/core/compile/polars/compiler.py index 7a243f2ee4ed..e690a8a51e5b 100644 --- a/packages/bigframes/bigframes/core/compile/polars/compiler.py +++ b/packages/bigframes/bigframes/core/compile/polars/compiler.py @@ -184,7 +184,11 @@ def _( op = expression.op - # Workaround for Polars panic on nulls in timezone-aware datetimes for certain ops. + # Polars panics on nulls from pandas objects in timezone-aware + # datetimes for certain ops. Convert to timezone-naive temporarily + # to avoid this issue. + # TODO(tswast): Remove workaround when + # https://github.com/pola-rs/polars/issues/27862 has been fixed. is_problematic_op = type(op) in ( date_ops.YearOp, date_ops.QuarterOp, @@ -198,7 +202,9 @@ def _( if ( input_expr.is_resolved and isinstance(input_expr.output_type, pd.ArrowDtype) - and isinstance(input_expr.output_type.pyarrow_dtype, pa.TimestampType) + and isinstance( + input_expr.output_type.pyarrow_dtype, pa.TimestampType + ) and input_expr.output_type.pyarrow_dtype.tz is not None ): tz_str = input_expr.output_type.pyarrow_dtype.tz @@ -212,7 +218,7 @@ def _( except Exception: dummy_tz = datetime.timezone.utc - dummy_val = datetime.datetime(1970, 1, 1, tzinfo=dummy_tz) + dummy_val = datetime.datetime(1970, 1, 1, tzinfo=dummy_tz) # type: ignore compiled_input = self.compile_expression(input_expr) filled_input = compiled_input.fill_null(dummy_val) From b9fb3de1d0ce44c25b52017a4f7c919c01e435bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 3 Jun 2026 19:12:09 +0000 Subject: [PATCH 10/15] mypy --- packages/bigframes/bigframes/core/compile/polars/compiler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/bigframes/bigframes/core/compile/polars/compiler.py b/packages/bigframes/bigframes/core/compile/polars/compiler.py index e690a8a51e5b..6f24929eeb4e 100644 --- a/packages/bigframes/bigframes/core/compile/polars/compiler.py +++ b/packages/bigframes/bigframes/core/compile/polars/compiler.py @@ -214,11 +214,11 @@ def _( try: from zoneinfo import ZoneInfo - dummy_tz = ZoneInfo(tz_str) + dummy_tz = ZoneInfo(tz_str) # type: ignore except Exception: dummy_tz = datetime.timezone.utc - dummy_val = datetime.datetime(1970, 1, 1, tzinfo=dummy_tz) # type: ignore + dummy_val = datetime.datetime(1970, 1, 1, tzinfo=dummy_tz) compiled_input = self.compile_expression(input_expr) filled_input = compiled_input.fill_null(dummy_val) From 78dadaac27ffaccd2cef63422a8fcb46f6b3b6e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 3 Jun 2026 20:01:04 +0000 Subject: [PATCH 11/15] fix more doctests --- .../bigframes/bigframes/bigquery/_operations/struct.py | 2 +- packages/bigframes/bigframes/core/indexes/base.py | 1 + packages/bigframes/bigframes/operations/ai.py | 10 ++++------ packages/bigframes/bigframes/series.py | 6 ++++-- .../bigframes_vendored/pandas/core/indexes/accessor.py | 4 ++-- .../bigframes_vendored/pandas/core/series.py | 7 ++++--- 6 files changed, 16 insertions(+), 14 deletions(-) diff --git a/packages/bigframes/bigframes/bigquery/_operations/struct.py b/packages/bigframes/bigframes/bigquery/_operations/struct.py index ba33457a768c..2ee760fb8e54 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/struct.py +++ b/packages/bigframes/bigframes/bigquery/_operations/struct.py @@ -57,5 +57,5 @@ def struct(value: dataframe.DataFrame) -> series.Series: block, result_id = block.apply_nary_op( block.value_columns, ops.StructOp(column_names=tuple(block.column_labels)) ) - block = block.select_column(result_id) + block = block.select_column(result_id).with_column_labels([None]) return series.Series(block) diff --git a/packages/bigframes/bigframes/core/indexes/base.py b/packages/bigframes/bigframes/core/indexes/base.py index 8c418471f6cc..32279d36c9ab 100644 --- a/packages/bigframes/bigframes/core/indexes/base.py +++ b/packages/bigframes/bigframes/core/indexes/base.py @@ -325,6 +325,7 @@ def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]: # Return boolean mask for non-monotonic duplicates mask_block = block_with_offsets.select_columns([match_col_id]) mask_block = mask_block.reset_index(drop=True) + mask_block = mask_block.with_column_labels([None]) result_series = bigframes.series.Series(mask_block) return result_series.astype("boolean") diff --git a/packages/bigframes/bigframes/operations/ai.py b/packages/bigframes/bigframes/operations/ai.py index c1c5164e9065..bba0bf5a8362 100644 --- a/packages/bigframes/bigframes/operations/ai.py +++ b/packages/bigframes/bigframes/operations/ai.py @@ -122,12 +122,10 @@ def map( >>> model = llm.GeminiTextGenerator(model_name="gemini-2.5-pro") >>> df = bpd.DataFrame({"ingredient_1": ["Burger Bun", "Soy Bean"], "ingredient_2": ["Beef Patty", "Bittern"]}) - >>> df.ai.map("What is the food made from {ingredient_1} and {ingredient_2}? One word only.", model=model, output_schema={"food": "string"}) - ingredient_1 ingredient_2 food - 0 Burger Bun Beef Patty Burger - - 1 Soy Bean Bittern Tofu - + >>> df.ai.map("What is the food made from {ingredient_1} and {ingredient_2}? One word only.", model=model, output_schema={"food": "string"}) # doctest: +ELLIPSIS + ingredient_1 ingredient_2... + 0 Burger Bun Beef Patty... + 1 Soy Bean Bittern...Tofu [2 rows x 3 columns] diff --git a/packages/bigframes/bigframes/series.py b/packages/bigframes/bigframes/series.py index 0091d0a34b6c..2f13a2b7d1ec 100644 --- a/packages/bigframes/bigframes/series.py +++ b/packages/bigframes/bigframes/series.py @@ -2470,7 +2470,9 @@ def map( self_df = self.to_frame(name="series") result_df = self_df.join(map_df, on="series") - return result_df[self.name] + result = cast(Series, result_df[self.name]) + result.name = self.name + return result @validations.requires_ordering() def sample( @@ -2696,7 +2698,7 @@ def _apply_nary_op( others, ignore_self=ignore_self, cast_scalars=False ) block, result_id = block.project_expr(op.as_expr(*values)) - return Series(block.select_column(result_id)) + return Series(block.select_column(result_id).with_column_labels([None])) def _apply_binary_aggregation( self, other: Series, stat: agg_ops.BinaryAggregateOp diff --git a/packages/bigframes/third_party/bigframes_vendored/pandas/core/indexes/accessor.py b/packages/bigframes/third_party/bigframes_vendored/pandas/core/indexes/accessor.py index a3404c222d49..da5f9e3b88a5 100644 --- a/packages/bigframes/third_party/bigframes_vendored/pandas/core/indexes/accessor.py +++ b/packages/bigframes/third_party/bigframes_vendored/pandas/core/indexes/accessor.py @@ -281,7 +281,7 @@ def month(self): **Examples:** >>> s = bpd.Series( - ... pd.date_range("2000-01-01", periods=3, freq="M") + ... pd.date_range("2000-01-01", periods=3, freq="ME") ... ) >>> s 0 2000-01-31 00:00:00 @@ -404,7 +404,7 @@ def year(self): **Examples:** >>> s = bpd.Series( - ... pd.date_range("2000-01-01", periods=3, freq="Y") + ... pd.date_range("2000-01-01", periods=3, freq="YE") ... ) >>> s 0 2000-12-31 00:00:00 diff --git a/packages/bigframes/third_party/bigframes_vendored/pandas/core/series.py b/packages/bigframes/third_party/bigframes_vendored/pandas/core/series.py index b9cacf3855a2..c116ed640122 100644 --- a/packages/bigframes/third_party/bigframes_vendored/pandas/core/series.py +++ b/packages/bigframes/third_party/bigframes_vendored/pandas/core/series.py @@ -2582,7 +2582,8 @@ def resample( ... } >>> s = bpd.DataFrame(data).set_index("timestamp_col") >>> s.resample(rule="7s", origin="epoch").min() - int64_col + int64_col + timestamp_col 2021-01-01 12:59:56 0 2021-01-01 13:00:03 3 2021-01-01 13:00:10 10 @@ -5674,8 +5675,8 @@ def iloc(self): With a scalar integer. - >>> type(df.iloc[0]) - + >>> type(df.iloc[0]) # doctest: +ELLIPSIS + >>> df.iloc[0] a 1 From c73de14725d6f5c002915c9a30ce2284996ccdc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 3 Jun 2026 20:09:13 +0000 Subject: [PATCH 12/15] fix system tests --- packages/bigframes/bigframes/testing/utils.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/packages/bigframes/bigframes/testing/utils.py b/packages/bigframes/bigframes/testing/utils.py index 6be74115341c..f0b9b3bb2c9b 100644 --- a/packages/bigframes/bigframes/testing/utils.py +++ b/packages/bigframes/bigframes/testing/utils.py @@ -94,10 +94,13 @@ def _normalize_all_nulls(col: pd.Series) -> pd.Series: if pd_types.is_float_dtype(col.dtype): col = col.astype("float64").astype("Float64") elif col.dtype == "object": - try: - col = col.astype("Float64") - except (TypeError, ValueError, SystemError): + if any(isinstance(x, decimal.Decimal) for x in col): pass + else: + try: + col = col.astype("Float64") + except (TypeError, ValueError, SystemError): + pass return col @@ -122,6 +125,7 @@ def assert_frame_equal( downcast_object: bool = True, **kwargs, ): + if ignore_order: # Sort by a column to get consistent results. if left.index.name != "rowindex": @@ -147,6 +151,7 @@ def assert_frame_equal( left.index = _normalize_index_nulls(left.index) right.index = _normalize_index_nulls(right.index) + pd.testing.assert_frame_equal(left, right, **kwargs) From 74035ce56bf66a74450200f99fb3f410ca63648e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 3 Jun 2026 20:12:55 +0000 Subject: [PATCH 13/15] snapshot run --- .../test_dataframe_accessor/test_bigframes_sql_scalar/out.sql | 2 +- .../snapshots/test_dataframe_accessor/test_sql_scalar/out.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_bigframes_sql_scalar/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_bigframes_sql_scalar/out.sql index 2c3f8c230edd..80b3137b0b55 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_bigframes_sql_scalar/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_bigframes_sql_scalar/out.sql @@ -1,4 +1,4 @@ SELECT `rowindex`, - ROUND(`int64_col` + `int64_too`) AS `bigframes_unnamed_column` + ROUND(`int64_col` + `int64_too`) AS `0` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_sql_scalar/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_sql_scalar/out.sql index 2c3f8c230edd..80b3137b0b55 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_sql_scalar/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_sql_scalar/out.sql @@ -1,4 +1,4 @@ SELECT `rowindex`, - ROUND(`int64_col` + `int64_too`) AS `bigframes_unnamed_column` + ROUND(`int64_col` + `int64_too`) AS `0` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` From 00946026d51e36e85d243c1e814a827a71e16647 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 3 Jun 2026 20:15:58 +0000 Subject: [PATCH 14/15] format --- packages/bigframes/bigframes/testing/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/bigframes/bigframes/testing/utils.py b/packages/bigframes/bigframes/testing/utils.py index f0b9b3bb2c9b..79e99968f583 100644 --- a/packages/bigframes/bigframes/testing/utils.py +++ b/packages/bigframes/bigframes/testing/utils.py @@ -125,7 +125,6 @@ def assert_frame_equal( downcast_object: bool = True, **kwargs, ): - if ignore_order: # Sort by a column to get consistent results. if left.index.name != "rowindex": @@ -151,7 +150,6 @@ def assert_frame_equal( left.index = _normalize_index_nulls(left.index) right.index = _normalize_index_nulls(right.index) - pd.testing.assert_frame_equal(left, right, **kwargs) From 732d5cf1877a01ced6e5e0985fd635096a95a897 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 3 Jun 2026 20:48:16 +0000 Subject: [PATCH 15/15] more doctest fixes --- .../bigframes/bigquery/_operations/ai.py | 27 ++++++++++--------- packages/bigframes/setup.py | 1 + .../sklearn/decomposition/_mf.py | 2 +- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/packages/bigframes/bigframes/bigquery/_operations/ai.py b/packages/bigframes/bigframes/bigquery/_operations/ai.py index 907d2e462295..78b5d81b6744 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/ai.py +++ b/packages/bigframes/bigframes/bigquery/_operations/ai.py @@ -61,7 +61,7 @@ def generate( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq >>> country = bpd.Series(["Japan", "Canada"]) - >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")) + >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")) # doctest: +ELLIPSIS 0 {'result': 'Tokyo', 'full_response': '{"cand... 1 {'result': 'Ottawa', 'full_response': '{"can... dtype: struct>, status: string>[pyarrow] @@ -231,8 +231,8 @@ def generate_int( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> animal = bpd.Series(["Kangaroo", "Rabbit", "Spider"]) - >>> bbq.ai.generate_int(("How many legs does a ", animal, " have?")) + >>> animal = bpd.Series(["Ostrich", "Rabbit", "Spider"]) + >>> bbq.ai.generate_int(("How many legs does a ", animal, " have?")) # doctest: +ELLIPSIS 0 {'result': 2, 'full_response': '{"candidates":... 1 {'result': 4, 'full_response': '{"candidates":... 2 {'result': 8, 'full_response': '{"candidates":... @@ -305,8 +305,8 @@ def generate_double( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> animal = bpd.Series(["Kangaroo", "Rabbit", "Spider"]) - >>> bbq.ai.generate_double(("How many legs does a ", animal, " have?")) + >>> animal = bpd.Series(["Ostrich", "Rabbit", "Spider"]) + >>> bbq.ai.generate_double(("How many legs does a ", animal, " have?")) # doctest: +ELLIPSIS 0 {'result': 2.0, 'full_response': '{"candidates... 1 {'result': 4.0, 'full_response': '{"candidates... 2 {'result': 8.0, 'full_response': '{"candidates... @@ -383,7 +383,7 @@ def generate_embedding( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq >>> df = bpd.DataFrame({"content": ["apple", "bear", "pear"]}) - >>> bbq.ai.generate_embedding( + >>> bbq.ai.generate_embedding( # doctest: +SKIP ... "project.dataset.model_name", ... df ... ) @@ -486,7 +486,7 @@ def generate_text( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq >>> df = bpd.DataFrame({"prompt": ["write a poem about apples"]}) - >>> bbq.ai.generate_text( + >>> bbq.ai.generate_text( # doctest: +SKIP ... "project.dataset.model_name", ... df ... ) @@ -601,7 +601,7 @@ def generate_table( >>> # the necessary columns for the model's prompt. For example, a >>> # DataFrame with a 'prompt' column for text classification. >>> df = bpd.DataFrame({'prompt': ["some text to classify"]}) - >>> result = bbq.ai.generate_table( + >>> result = bbq.ai.generate_table( # doctest: +SKIP ... "project.dataset.model_name", ... data=df, ... output_schema="category STRING" @@ -708,12 +708,14 @@ def embed( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bbq.ai.embed("dog", endpoint="text-embedding-005") + >>> bbq.ai.embed("dog", endpoint="text-embedding-005") # doctest: +ELLIPSIS 0 {'result': array([ 1.78243860e-03, -1.10658340... + dtype: struct, status: string>[pyarrow] >>> s = bpd.Series(['dog']) - >>> bbq.ai.embed(s, endpoint='text-embedding-005') + >>> bbq.ai.embed(s, endpoint='text-embedding-005') # doctest: +ELLIPSIS 0 {'result': array([ 1.78243860e-03, -1.10658340... + dtype: struct, status: string>[pyarrow] Args: content (str | Series): @@ -1004,6 +1006,7 @@ def similarity( >>> bbq.ai.similarity(df['word'], 'glad', endpoint='text-embedding-005') 0 0.916601 1 0.660579 + Name: word, dtype: Float64 Args: content1 (str | Series): @@ -1082,8 +1085,8 @@ def forecast( >>> df = pd.DataFrame({"value": [1, 2, 3], "time": pd.to_datetime(["2020-01-01", "2020-01-02", "2020-01-03"])}) >>> bpd.options.display.progress_bar = None >>> forecasted_pandas_df = df.bigquery.ai.forecast(data_col="value", timestamp_col="time", horizon=2) - >>> type(forecasted_pandas_df) - + >>> type(forecasted_pandas_df) # doctest: +ELLIPSIS + Forecast using a BigFrames DataFrame: diff --git a/packages/bigframes/setup.py b/packages/bigframes/setup.py index 5e5a3c5ffa07..138c52879526 100644 --- a/packages/bigframes/setup.py +++ b/packages/bigframes/setup.py @@ -75,6 +75,7 @@ "pytest-snapshot", "google-cloud-bigtable >=2.24.0", "google-cloud-pubsub >=2.21.4", + "tzdata", ], # used for local engine "polars": ["polars >= 1.21.0"], diff --git a/packages/bigframes/third_party/bigframes_vendored/sklearn/decomposition/_mf.py b/packages/bigframes/third_party/bigframes_vendored/sklearn/decomposition/_mf.py index 6d5a40714505..0ce79995d0c3 100644 --- a/packages/bigframes/third_party/bigframes_vendored/sklearn/decomposition/_mf.py +++ b/packages/bigframes/third_party/bigframes_vendored/sklearn/decomposition/_mf.py @@ -29,7 +29,7 @@ class MatrixFactorization(BaseEstimator, metaclass=ABCMeta): ... "value": [1, 1, 2, 1, 3, 1.2, 4, 1, 5, 0.8, 6, 1, 2, 3], ... }) >>> model = MatrixFactorization(feedback_type='explicit', num_factors=6, user_col='row', item_col='column', rating_col='value', l2_reg=2.06) - >>> W = model.fit(X) + >>> W = model.fit(X) # doctest: +SKIP Args: feedback_type ('explicit' | 'implicit'):