diff --git a/packages/bigframes/bigframes/bigquery/_operations/ai.py b/packages/bigframes/bigframes/bigquery/_operations/ai.py index 907d2e462295..78b5d81b6744 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/ai.py +++ b/packages/bigframes/bigframes/bigquery/_operations/ai.py @@ -61,7 +61,7 @@ def generate( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq >>> country = bpd.Series(["Japan", "Canada"]) - >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")) + >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")) # doctest: +ELLIPSIS 0 {'result': 'Tokyo', 'full_response': '{"cand... 1 {'result': 'Ottawa', 'full_response': '{"can... dtype: struct>, status: string>[pyarrow] @@ -231,8 +231,8 @@ def generate_int( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> animal = bpd.Series(["Kangaroo", "Rabbit", "Spider"]) - >>> bbq.ai.generate_int(("How many legs does a ", animal, " have?")) + >>> animal = bpd.Series(["Ostrich", "Rabbit", "Spider"]) + >>> bbq.ai.generate_int(("How many legs does a ", animal, " have?")) # doctest: +ELLIPSIS 0 {'result': 2, 'full_response': '{"candidates":... 1 {'result': 4, 'full_response': '{"candidates":... 2 {'result': 8, 'full_response': '{"candidates":... @@ -305,8 +305,8 @@ def generate_double( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> animal = bpd.Series(["Kangaroo", "Rabbit", "Spider"]) - >>> bbq.ai.generate_double(("How many legs does a ", animal, " have?")) + >>> animal = bpd.Series(["Ostrich", "Rabbit", "Spider"]) + >>> bbq.ai.generate_double(("How many legs does a ", animal, " have?")) # doctest: +ELLIPSIS 0 {'result': 2.0, 'full_response': '{"candidates... 1 {'result': 4.0, 'full_response': '{"candidates... 2 {'result': 8.0, 'full_response': '{"candidates... @@ -383,7 +383,7 @@ def generate_embedding( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq >>> df = bpd.DataFrame({"content": ["apple", "bear", "pear"]}) - >>> bbq.ai.generate_embedding( + >>> bbq.ai.generate_embedding( # doctest: +SKIP ... "project.dataset.model_name", ... df ... ) @@ -486,7 +486,7 @@ def generate_text( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq >>> df = bpd.DataFrame({"prompt": ["write a poem about apples"]}) - >>> bbq.ai.generate_text( + >>> bbq.ai.generate_text( # doctest: +SKIP ... "project.dataset.model_name", ... df ... ) @@ -601,7 +601,7 @@ def generate_table( >>> # the necessary columns for the model's prompt. For example, a >>> # DataFrame with a 'prompt' column for text classification. >>> df = bpd.DataFrame({'prompt': ["some text to classify"]}) - >>> result = bbq.ai.generate_table( + >>> result = bbq.ai.generate_table( # doctest: +SKIP ... "project.dataset.model_name", ... data=df, ... output_schema="category STRING" @@ -708,12 +708,14 @@ def embed( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bbq.ai.embed("dog", endpoint="text-embedding-005") + >>> bbq.ai.embed("dog", endpoint="text-embedding-005") # doctest: +ELLIPSIS 0 {'result': array([ 1.78243860e-03, -1.10658340... + dtype: struct, status: string>[pyarrow] >>> s = bpd.Series(['dog']) - >>> bbq.ai.embed(s, endpoint='text-embedding-005') + >>> bbq.ai.embed(s, endpoint='text-embedding-005') # doctest: +ELLIPSIS 0 {'result': array([ 1.78243860e-03, -1.10658340... + dtype: struct, status: string>[pyarrow] Args: content (str | Series): @@ -1004,6 +1006,7 @@ def similarity( >>> bbq.ai.similarity(df['word'], 'glad', endpoint='text-embedding-005') 0 0.916601 1 0.660579 + Name: word, dtype: Float64 Args: content1 (str | Series): @@ -1082,8 +1085,8 @@ def forecast( >>> df = pd.DataFrame({"value": [1, 2, 3], "time": pd.to_datetime(["2020-01-01", "2020-01-02", "2020-01-03"])}) >>> bpd.options.display.progress_bar = None >>> forecasted_pandas_df = df.bigquery.ai.forecast(data_col="value", timestamp_col="time", horizon=2) - >>> type(forecasted_pandas_df) - + >>> type(forecasted_pandas_df) # doctest: +ELLIPSIS + Forecast using a BigFrames DataFrame: diff --git a/packages/bigframes/bigframes/bigquery/_operations/struct.py b/packages/bigframes/bigframes/bigquery/_operations/struct.py index ba33457a768c..2ee760fb8e54 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/struct.py +++ b/packages/bigframes/bigframes/bigquery/_operations/struct.py @@ -57,5 +57,5 @@ def struct(value: dataframe.DataFrame) -> series.Series: block, result_id = block.apply_nary_op( block.value_columns, ops.StructOp(column_names=tuple(block.column_labels)) ) - block = block.select_column(result_id) + block = block.select_column(result_id).with_column_labels([None]) return series.Series(block) diff --git a/packages/bigframes/bigframes/core/blocks.py b/packages/bigframes/bigframes/core/blocks.py index 33f5aaab5c7d..0ab5f6729eed 100644 --- a/packages/bigframes/bigframes/core/blocks.py +++ b/packages/bigframes/bigframes/core/blocks.py @@ -1989,6 +1989,10 @@ def _generate_resample_label( ) level = level or 0 col_id = self.index.resolve_level(level)[0] + if isinstance(level, int): + resample_label = self.index.names[level] + else: + resample_label = level # Reset index to make the resampling level a column, then drop all other index columns. # This simplifies processing by focusing solely on the column required for resampling. block = self.reset_index(drop=False) @@ -2007,6 +2011,7 @@ def _generate_resample_label( raise KeyError(f"The grouper name {on} is not found") col_id = matches[0] + resample_label = on block = self if level is None: dtype = self._column_type(col_id) @@ -2099,6 +2104,7 @@ def _generate_resample_label( block.value_columns[0], block.value_columns[1], op=ops.IntegerLabelToDatetimeOp(freq=freq, label=label, origin=origin), + result_label=resample_label, ) # After multiple merges, the columns: diff --git a/packages/bigframes/bigframes/core/compile/polars/compiler.py b/packages/bigframes/bigframes/core/compile/polars/compiler.py index dac78f5c7b89..6f24929eeb4e 100644 --- a/packages/bigframes/bigframes/core/compile/polars/compiler.py +++ b/packages/bigframes/bigframes/core/compile/polars/compiler.py @@ -178,8 +178,59 @@ def _( self, expression: ex.OpExpression, ) -> pl.Expr: - # TODO: Complete the implementation + import datetime + + import pyarrow as pa + op = expression.op + + # Polars panics on nulls from pandas objects in timezone-aware + # datetimes for certain ops. Convert to timezone-naive temporarily + # to avoid this issue. + # TODO(tswast): Remove workaround when + # https://github.com/pola-rs/polars/issues/27862 has been fixed. + is_problematic_op = type(op) in ( + date_ops.YearOp, + date_ops.QuarterOp, + date_ops.MonthOp, + date_ops.DayOp, + date_ops.IsoWeekOp, + ) + + if is_problematic_op and len(expression.inputs) == 1: + input_expr = expression.inputs[0] + if ( + input_expr.is_resolved + and isinstance(input_expr.output_type, pd.ArrowDtype) + and isinstance( + input_expr.output_type.pyarrow_dtype, pa.TimestampType + ) + and input_expr.output_type.pyarrow_dtype.tz is not None + ): + tz_str = input_expr.output_type.pyarrow_dtype.tz + if tz_str == "UTC": + dummy_tz = datetime.timezone.utc + else: + try: + from zoneinfo import ZoneInfo + + dummy_tz = ZoneInfo(tz_str) # type: ignore + except Exception: + dummy_tz = datetime.timezone.utc + + dummy_val = datetime.datetime(1970, 1, 1, tzinfo=dummy_tz) + + compiled_input = self.compile_expression(input_expr) + filled_input = compiled_input.fill_null(dummy_val) + compiled_op_with_fill = self.compile_op(op, filled_input) + + return ( + pl.when(compiled_input.is_null()) + .then(None) + .otherwise(compiled_op_with_fill) + ) + + # TODO: Complete the implementation args = tuple(map(self.compile_expression, expression.inputs)) return self.compile_op(op, *args) diff --git a/packages/bigframes/bigframes/core/indexes/base.py b/packages/bigframes/bigframes/core/indexes/base.py index 8c418471f6cc..32279d36c9ab 100644 --- a/packages/bigframes/bigframes/core/indexes/base.py +++ b/packages/bigframes/bigframes/core/indexes/base.py @@ -325,6 +325,7 @@ def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]: # Return boolean mask for non-monotonic duplicates mask_block = block_with_offsets.select_columns([match_col_id]) mask_block = mask_block.reset_index(drop=True) + mask_block = mask_block.with_column_labels([None]) result_series = bigframes.series.Series(mask_block) return result_series.astype("boolean") diff --git a/packages/bigframes/bigframes/operations/ai.py b/packages/bigframes/bigframes/operations/ai.py index c1c5164e9065..bba0bf5a8362 100644 --- a/packages/bigframes/bigframes/operations/ai.py +++ b/packages/bigframes/bigframes/operations/ai.py @@ -122,12 +122,10 @@ def map( >>> model = llm.GeminiTextGenerator(model_name="gemini-2.5-pro") >>> df = bpd.DataFrame({"ingredient_1": ["Burger Bun", "Soy Bean"], "ingredient_2": ["Beef Patty", "Bittern"]}) - >>> df.ai.map("What is the food made from {ingredient_1} and {ingredient_2}? One word only.", model=model, output_schema={"food": "string"}) - ingredient_1 ingredient_2 food - 0 Burger Bun Beef Patty Burger - - 1 Soy Bean Bittern Tofu - + >>> df.ai.map("What is the food made from {ingredient_1} and {ingredient_2}? One word only.", model=model, output_schema={"food": "string"}) # doctest: +ELLIPSIS + ingredient_1 ingredient_2... + 0 Burger Bun Beef Patty... + 1 Soy Bean Bittern...Tofu [2 rows x 3 columns] diff --git a/packages/bigframes/bigframes/series.py b/packages/bigframes/bigframes/series.py index 0091d0a34b6c..2f13a2b7d1ec 100644 --- a/packages/bigframes/bigframes/series.py +++ b/packages/bigframes/bigframes/series.py @@ -2470,7 +2470,9 @@ def map( self_df = self.to_frame(name="series") result_df = self_df.join(map_df, on="series") - return result_df[self.name] + result = cast(Series, result_df[self.name]) + result.name = self.name + return result @validations.requires_ordering() def sample( @@ -2696,7 +2698,7 @@ def _apply_nary_op( others, ignore_self=ignore_self, cast_scalars=False ) block, result_id = block.project_expr(op.as_expr(*values)) - return Series(block.select_column(result_id)) + return Series(block.select_column(result_id).with_column_labels([None])) def _apply_binary_aggregation( self, other: Series, stat: agg_ops.BinaryAggregateOp diff --git a/packages/bigframes/bigframes/testing/utils.py b/packages/bigframes/bigframes/testing/utils.py index b3b8ba1ab921..79e99968f583 100644 --- a/packages/bigframes/bigframes/testing/utils.py +++ b/packages/bigframes/bigframes/testing/utils.py @@ -93,6 +93,14 @@ def assert_series_equivalent(pd_series: pd.Series, bf_series: bpd.Series, **kwar def _normalize_all_nulls(col: pd.Series) -> pd.Series: if pd_types.is_float_dtype(col.dtype): col = col.astype("float64").astype("Float64") + elif col.dtype == "object": + if any(isinstance(x, decimal.Decimal) for x in col): + pass + else: + try: + col = col.astype("Float64") + except (TypeError, ValueError, SystemError): + pass return col diff --git a/packages/bigframes/setup.py b/packages/bigframes/setup.py index 819f8489e36e..138c52879526 100644 --- a/packages/bigframes/setup.py +++ b/packages/bigframes/setup.py @@ -38,7 +38,7 @@ "fsspec >=2023.3.0", "gcsfs >=2023.3.0, !=2025.5.0, !=2026.2.0, !=2026.3.0", "geopandas >=0.12.2", - "google-auth >=2.15.0,<3.0", + "google-auth[pyopenssl] >=2.15.0,<3.0", "google-cloud-bigquery[bqstorage,pandas] >=3.36.0", # 2.30 needed for arrow support. "google-cloud-bigquery-storage >= 2.30.0, < 3.0.0", @@ -75,6 +75,7 @@ "pytest-snapshot", "google-cloud-bigtable >=2.24.0", "google-cloud-pubsub >=2.21.4", + "tzdata", ], # used for local engine "polars": ["polars >= 1.21.0"], diff --git a/packages/bigframes/tests/system/small/functions/test_remote_function.py b/packages/bigframes/tests/system/small/functions/test_remote_function.py index a970fab64db3..869b26ca38c5 100644 --- a/packages/bigframes/tests/system/small/functions/test_remote_function.py +++ b/packages/bigframes/tests/system/small/functions/test_remote_function.py @@ -468,7 +468,12 @@ def add_one(x): pd_int64_df = scalars_pandas_df[int64_cols] pd_int64_df_filtered = pd_int64_df.dropna() - pd_result = pd_int64_df_filtered.applymap(add_one) + + # TODO(swast): Remove when pandas 2.1.x+ is the minimum supported. + if hasattr(pd_int64_df_filtered, "map"): + pd_result = pd_int64_df_filtered.map(add_one) + else: + pd_result = pd_int64_df_filtered.applymap(add_one) # TODO(shobs): Figure why pandas .applymap() changes the dtype, i.e. # pd_int64_df_filtered.dtype is Int64Dtype() # pd_int64_df_filtered.applymap(lambda x: x).dtype is int64. @@ -503,7 +508,13 @@ def add_one(x): pd_int64_df = scalars_pandas_df[int64_cols] pd_int64_df_filtered = pd_int64_df[pd_int64_df["int64_col"].notnull()] - pd_result = pd_int64_df_filtered.applymap(add_one) + + # TODO(swast): Remove when pandas 2.1.x+ is the minimum supported. + if hasattr(pd_int64_df_filtered, "map"): + pd_result = pd_int64_df_filtered.map(add_one) + else: + pd_result = pd_int64_df_filtered.applymap(add_one) + # TODO(shobs): Figure why pandas .applymap() changes the dtype, i.e. # pd_int64_df_filtered.dtype is Int64Dtype() # pd_int64_df_filtered.applymap(lambda x: x).dtype is int64. @@ -536,7 +547,13 @@ def add_one(x): bf_result = bf_int64_df.applymap(remote_add_one, na_action="ignore").to_pandas() pd_int64_df = scalars_pandas_df[int64_cols] - pd_result = pd_int64_df.applymap(add_one, na_action="ignore") + + # TODO(swast): Remove when pandas 2.1.x+ is the minimum supported. + if hasattr(pd_int64_df, "map"): + pd_result = pd_int64_df.map(add_one, na_action="ignore") + else: + pd_result = pd_int64_df.applymap(add_one, na_action="ignore") + # TODO(shobs): Figure why pandas .applymap() changes the dtype, i.e. # pd_int64_df_filtered.dtype is Int64Dtype() # pd_int64_df_filtered.applymap(lambda x: x).dtype is int64. diff --git a/packages/bigframes/tests/system/small/test_magics.py b/packages/bigframes/tests/system/small/test_magics.py index 91ada5b9e34a..eac0f233f98e 100644 --- a/packages/bigframes/tests/system/small/test_magics.py +++ b/packages/bigframes/tests/system/small/test_magics.py @@ -44,7 +44,7 @@ def test_magic_select_lit_to_var(ip): assert "dst_var" in ip.user_ns result_df = ip.user_ns["dst_var"] assert result_df.shape == (1, 1) - assert result_df.loc[0, 0] == 3 + assert result_df.to_pandas().iloc[0, 0] == 3 def test_magic_select_lit_dry_run(ip): @@ -97,4 +97,4 @@ def test_magic_select_interpolate(ip): assert "dst_var" in ip.user_ns result_df = ip.user_ns["dst_var"] assert result_df.shape == (1, 1) - assert result_df.loc[0, 0] == 9 + assert result_df.loc[0, "total"] == 9 diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_fromrange/test_compile_fromrange/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_fromrange/test_compile_fromrange/out.sql index 0b0e07056ab4..4f4e2496498f 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_fromrange/test_compile_fromrange/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_fromrange/test_compile_fromrange/out.sql @@ -60,7 +60,7 @@ WITH `bfcte_0` AS ( SELECT CAST(TIMESTAMP_MICROS( CAST(CAST(`bfcol_17` AS BIGNUMERIC) * 7000000 + CAST(UNIX_MICROS(CAST(CAST(`bfcol_8` AS DATE) AS TIMESTAMP)) AS BIGNUMERIC) AS INT64) - ) AS DATETIME) AS `bigframes_unnamed_index`, + ) AS DATETIME) AS `timestamp_col`, `bfcol_11` AS `int64_col`, `bfcol_12` AS `int64_too` FROM ( @@ -72,4 +72,4 @@ FROM ( LEFT JOIN `bfcte_5` ON `bfcol_17` = `bfcol_13` ORDER BY - `bfcol_17` ASC NULLS LAST \ No newline at end of file + `bfcol_17` ASC NULLS LAST diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_bigframes_sql_scalar/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_bigframes_sql_scalar/out.sql index 14853067c700..80b3137b0b55 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_bigframes_sql_scalar/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_bigframes_sql_scalar/out.sql @@ -1,4 +1,4 @@ SELECT `rowindex`, ROUND(`int64_col` + `int64_too`) AS `0` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_sql_scalar/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_sql_scalar/out.sql index 14853067c700..80b3137b0b55 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_sql_scalar/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_sql_scalar/out.sql @@ -1,4 +1,4 @@ SELECT `rowindex`, ROUND(`int64_col` + `int64_too`) AS `0` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_fromrange.py b/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_fromrange.py index ba2e2075517b..8c25ca0310cd 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_fromrange.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_fromrange.py @@ -32,4 +32,4 @@ def test_compile_fromrange(compiler_session, snapshot): sql, _, _ = df.resample(rule="7s")._block.to_sql_query( include_index=True, enable_cache=False ) - snapshot.assert_match(sql, "out.sql") + snapshot.assert_match(sql.strip() + "\n", "out.sql") diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/test_dataframe_accessor.py b/packages/bigframes/tests/unit/core/compile/sqlglot/test_dataframe_accessor.py index 26e4d1788059..e430f5664975 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/test_dataframe_accessor.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/test_dataframe_accessor.py @@ -22,6 +22,10 @@ pytest.importorskip("pytest_snapshot") +# Only test on the latest pandas since column naming behavior is slightly +# different across versions, e.g. unnamed vs 0 for unnamed Series. +pytest.importorskip("pandas", minversion="3.0.0") + def test_sql_scalar(scalar_types_df: bpd.DataFrame, snapshot, monkeypatch): session = mock.create_autospec(bigframes.session.Session) @@ -42,7 +46,7 @@ def to_pandas(series, *, ordered): ) session.read_pandas.assert_called_once() - snapshot.assert_match(result, "out.sql") + snapshot.assert_match(result.strip() + "\n", "out.sql") def test_bigframes_sql_scalar(scalar_types_df: bpd.DataFrame, snapshot): @@ -57,4 +61,4 @@ def test_bigframes_sql_scalar(scalar_types_df: bpd.DataFrame, snapshot): session.read_pandas.assert_not_called() # Bigframes implementation returns a bigframes.series.Series sql, _, _ = result.to_frame()._to_sql_query(include_index=True) - snapshot.assert_match(sql, "out.sql") + snapshot.assert_match(sql.strip() + "\n", "out.sql") diff --git a/packages/bigframes/tests/unit/extensions/core/test_dataframe_accessor.py b/packages/bigframes/tests/unit/extensions/core/test_dataframe_accessor.py index 914a448700f4..7ab4f5176980 100644 --- a/packages/bigframes/tests/unit/extensions/core/test_dataframe_accessor.py +++ b/packages/bigframes/tests/unit/extensions/core/test_dataframe_accessor.py @@ -96,16 +96,16 @@ def mock_generate(prompt, **kwargs): output_schema={"res": "STRING"}, ) - assert result == ( - df["text_input"], - { - "connection_id": "conn", - "endpoint": "endpoint", - "request_type": "dedicated", - "model_params": {"temp": 0.5}, - "output_schema": {"res": "STRING"}, - }, - ) + assert isinstance(result, tuple) + assert len(result) == 2 + pd.testing.assert_series_equal(result[0], df["text_input"]) + assert result[1] == { + "connection_id": "conn", + "endpoint": "endpoint", + "request_type": "dedicated", + "model_params": {"temp": 0.5}, + "output_schema": {"res": "STRING"}, + } def test_bigframes_ai_generate(scalar_types_df: bpd.DataFrame, monkeypatch): @@ -147,15 +147,15 @@ def mock_generate_bool(prompt, **kwargs): model_params={"temp": 0.5}, ) - assert result == ( - df["text_input"], - { - "connection_id": "conn", - "endpoint": "endpoint", - "request_type": "dedicated", - "model_params": {"temp": 0.5}, - }, - ) + assert isinstance(result, tuple) + assert len(result) == 2 + pd.testing.assert_series_equal(result[0], df["text_input"]) + assert result[1] == { + "connection_id": "conn", + "endpoint": "endpoint", + "request_type": "dedicated", + "model_params": {"temp": 0.5}, + } def test_bigframes_ai_generate_bool(scalar_types_df: bpd.DataFrame, monkeypatch): @@ -196,15 +196,15 @@ def mock_generate_int(prompt, **kwargs): model_params={"temp": 0.5}, ) - assert result == ( - df["text_input"], - { - "connection_id": "conn", - "endpoint": "endpoint", - "request_type": "dedicated", - "model_params": {"temp": 0.5}, - }, - ) + assert isinstance(result, tuple) + assert len(result) == 2 + pd.testing.assert_series_equal(result[0], df["text_input"]) + assert result[1] == { + "connection_id": "conn", + "endpoint": "endpoint", + "request_type": "dedicated", + "model_params": {"temp": 0.5}, + } def test_bigframes_ai_generate_int(scalar_types_df: bpd.DataFrame, monkeypatch): @@ -245,15 +245,15 @@ def mock_generate_double(prompt, **kwargs): model_params={"temp": 0.5}, ) - assert result == ( - df["text_input"], - { - "connection_id": "conn", - "endpoint": "endpoint", - "request_type": "dedicated", - "model_params": {"temp": 0.5}, - }, - ) + assert isinstance(result, tuple) + assert len(result) == 2 + pd.testing.assert_series_equal(result[0], df["text_input"]) + assert result[1] == { + "connection_id": "conn", + "endpoint": "endpoint", + "request_type": "dedicated", + "model_params": {"temp": 0.5}, + } def test_bigframes_ai_generate_double(scalar_types_df: bpd.DataFrame, monkeypatch): diff --git a/packages/bigframes/tests/unit/test_col.py b/packages/bigframes/tests/unit/test_col.py index 9f5bbca5d9bc..c8caf9136c0a 100644 --- a/packages/bigframes/tests/unit/test_col.py +++ b/packages/bigframes/tests/unit/test_col.py @@ -88,10 +88,10 @@ def scalars_dfs( def test_pd_col_unary_operators(scalars_dfs, op): scalars_df, scalars_pandas_df = scalars_dfs bf_kwargs = { - "result": op(bpd.col("float64_col")), + "result": op(bpd.col("bool_col")), } pd_kwargs = { - "result": op(pd.col("float64_col")), # type: ignore + "result": op(pd.col("bool_col")), # type: ignore } df = scalars_df.assign(**bf_kwargs) diff --git a/packages/bigframes/third_party/bigframes_vendored/pandas/core/frame.py b/packages/bigframes/third_party/bigframes_vendored/pandas/core/frame.py index 678fb5f65177..f016cab47ae3 100644 --- a/packages/bigframes/third_party/bigframes_vendored/pandas/core/frame.py +++ b/packages/bigframes/third_party/bigframes_vendored/pandas/core/frame.py @@ -66,7 +66,7 @@ def axes(self) -> list: >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> df.axes[1:] - [Index(['col1', 'col2'], dtype='object')] + [Index(['col1', 'col2'], dtype='str')] """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1963,7 +1963,7 @@ def keys(self): ... 'B': [4, 5, 6], ... }) >>> df.keys() - Index(['A', 'B'], dtype='object') + Index(['A', 'B'], dtype='str') Returns: pandas.Index: Info axis. @@ -4819,7 +4819,8 @@ def resample( >>> df = bpd.DataFrame(data).set_index("timestamp_col") >>> df.resample(rule="7s").min() - int64_col int64_too + int64_col int64_too + timestamp_col 2021-01-01 12:59:55 0 10 2021-01-01 13:00:02 2 12 2021-01-01 13:00:09 9 19 @@ -4832,7 +4833,8 @@ def resample( >>> df = bpd.DataFrame(data) >>> df.resample(rule="7s", on = "timestamp_col", origin="start").min() - int64_col int64_too + int64_col int64_too + timestamp_col 2021-01-01 13:00:00 0 10 2021-01-01 13:00:07 7 17 2021-01-01 13:00:14 14 24 @@ -6633,7 +6635,7 @@ def columns(self): [3 rows x 3 columns] >>> df.columns - Index(['Name', 'Age', 'Location'], dtype='object') + Index(['Name', 'Age', 'Location'], dtype='str') You can also set new labels for columns. @@ -6646,7 +6648,7 @@ def columns(self): [3 rows x 3 columns] >>> df.columns - Index(['NewName', 'NewAge', 'NewLocation'], dtype='object') + Index(['NewName', 'NewAge', 'NewLocation'], dtype='str') """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) diff --git a/packages/bigframes/third_party/bigframes_vendored/pandas/core/generic.py b/packages/bigframes/third_party/bigframes_vendored/pandas/core/generic.py index a5a3e6098376..0e4ac335c8a0 100644 --- a/packages/bigframes/third_party/bigframes_vendored/pandas/core/generic.py +++ b/packages/bigframes/third_party/bigframes_vendored/pandas/core/generic.py @@ -629,9 +629,9 @@ def dtypes(self): >>> df = bpd.DataFrame({'float': [1.0], 'int': [1], 'string': ['foo']}) >>> df.dtypes - float Float64 - int Int64 - string string[pyarrow] + float Float64 + int Int64 + string string dtype: object Returns: diff --git a/packages/bigframes/third_party/bigframes_vendored/pandas/core/indexes/accessor.py b/packages/bigframes/third_party/bigframes_vendored/pandas/core/indexes/accessor.py index a3404c222d49..da5f9e3b88a5 100644 --- a/packages/bigframes/third_party/bigframes_vendored/pandas/core/indexes/accessor.py +++ b/packages/bigframes/third_party/bigframes_vendored/pandas/core/indexes/accessor.py @@ -281,7 +281,7 @@ def month(self): **Examples:** >>> s = bpd.Series( - ... pd.date_range("2000-01-01", periods=3, freq="M") + ... pd.date_range("2000-01-01", periods=3, freq="ME") ... ) >>> s 0 2000-01-31 00:00:00 @@ -404,7 +404,7 @@ def year(self): **Examples:** >>> s = bpd.Series( - ... pd.date_range("2000-01-01", periods=3, freq="Y") + ... pd.date_range("2000-01-01", periods=3, freq="YE") ... ) >>> s 0 2000-12-31 00:00:00 diff --git a/packages/bigframes/third_party/bigframes_vendored/pandas/core/series.py b/packages/bigframes/third_party/bigframes_vendored/pandas/core/series.py index b9cacf3855a2..c116ed640122 100644 --- a/packages/bigframes/third_party/bigframes_vendored/pandas/core/series.py +++ b/packages/bigframes/third_party/bigframes_vendored/pandas/core/series.py @@ -2582,7 +2582,8 @@ def resample( ... } >>> s = bpd.DataFrame(data).set_index("timestamp_col") >>> s.resample(rule="7s", origin="epoch").min() - int64_col + int64_col + timestamp_col 2021-01-01 12:59:56 0 2021-01-01 13:00:03 3 2021-01-01 13:00:10 10 @@ -5674,8 +5675,8 @@ def iloc(self): With a scalar integer. - >>> type(df.iloc[0]) - + >>> type(df.iloc[0]) # doctest: +ELLIPSIS + >>> df.iloc[0] a 1 diff --git a/packages/bigframes/third_party/bigframes_vendored/sklearn/decomposition/_mf.py b/packages/bigframes/third_party/bigframes_vendored/sklearn/decomposition/_mf.py index 6d5a40714505..0ce79995d0c3 100644 --- a/packages/bigframes/third_party/bigframes_vendored/sklearn/decomposition/_mf.py +++ b/packages/bigframes/third_party/bigframes_vendored/sklearn/decomposition/_mf.py @@ -29,7 +29,7 @@ class MatrixFactorization(BaseEstimator, metaclass=ABCMeta): ... "value": [1, 1, 2, 1, 3, 1.2, 4, 1, 5, 0.8, 6, 1, 2, 3], ... }) >>> model = MatrixFactorization(feedback_type='explicit', num_factors=6, user_col='row', item_col='column', rating_col='value', l2_reg=2.06) - >>> W = model.fit(X) + >>> W = model.fit(X) # doctest: +SKIP Args: feedback_type ('explicit' | 'implicit'):