Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions packages/bigframes/bigframes/ml/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ def __init__(
warnings.warn(msg, category=exceptions.PreviewWarning)

if model_name is None:
model_name = "gemini-2.0-flash-001"
model_name = "gemini-2.5-pro"
msg = exceptions.format_message(_REMOVE_DEFAULT_MODEL_WARNING)
warnings.warn(msg, category=FutureWarning, stacklevel=2)

Expand All @@ -522,15 +522,21 @@ def _create_bqml_model(self):
)
)
warnings.warn(msg)
if self.model_name.startswith("gemini-1.5"):
if self.model_name.startswith(
("gemini-2.0", "gemini-1.5")
) or self.model_name in (
"gemini-2.5-flash",
"gemini-2.5-flash-lite",
"gemini-2.5-pro-preview-05-06",
):
msg = exceptions.format_message(
_MODEL_DEPRECATE_WARNING.format(
model_name=self.model_name,
new_model_name="gemini-2.5-X",
new_model_name="gemini-2.5-pro",
link="https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator",
)
)
warnings.warn(msg)
warnings.warn(msg, category=exceptions.ApiDeprecationWarning)

options = {"endpoint": self.model_name}

Expand Down
10 changes: 6 additions & 4 deletions packages/bigframes/bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2258,11 +2258,13 @@ def mask(self, cond, other=None) -> Series:
return self.where(~cond, other)

def to_frame(self, name: blocks.Label = None) -> bigframes.dataframe.DataFrame:
provided_name = name if name else self.name
provided_name = name if name is not None else self.name
# To be consistent with Pandas, it assigns 0 as the column name if missing. 0 is the first element of RangeIndex.
block = self._block.with_column_labels(
[provided_name] if provided_name else [0]
)
if provided_name is None or pandas.isna(provided_name):
column_names = [0]
else:
column_names = [provided_name]
block = self._block.with_column_labels(column_names)
return bigframes.dataframe.DataFrame(block)

def to_csv(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def text_model(bq_connection, dataset_id):
model_name = f"{dataset_id}.text_model"
return ml.create_model(
model_name=model_name,
options={"endpoint": "gemini-2.5-flash"},
options={"endpoint": "gemini-2.5-pro"},
connection_name=bq_connection,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def gemini_flash_model(session, bq_connection) -> llm.GeminiTextGenerator:
return llm.GeminiTextGenerator(
session=session,
connection_name=bq_connection,
model_name="gemini-2.5-flash",
model_name="gemini-2.5-pro",
)


Expand Down
18 changes: 9 additions & 9 deletions packages/bigframes/tests/system/small/bigquery/test_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_ai_function_pandas_input(session):
s2 = bpd.Series(["fruit", "tree"], session=session)
prompt = (s1, " is a ", s2)

result = bbq.ai.generate_bool(prompt, endpoint="gemini-2.5-flash")
result = bbq.ai.generate_bool(prompt, endpoint="gemini-2.5-pro")

assert _contains_no_nulls(result)
assert result.dtype == pd.ArrowDtype(
Expand All @@ -81,7 +81,7 @@ def test_ai_function_string_input(session):
mock_get_session.return_value = session
prompt = "Is apple a fruit?"

result = bbq.ai.generate_bool(prompt, endpoint="gemini-2.5-flash")
result = bbq.ai.generate_bool(prompt, endpoint="gemini-2.5-pro")

assert _contains_no_nulls(result)
assert result.dtype == pd.ArrowDtype(
Expand All @@ -102,7 +102,7 @@ def test_ai_function_compile_model_params(session):
model_params = {"generation_config": {"thinking_config": {"thinking_budget": 0}}}

result = bbq.ai.generate_bool(
prompt, endpoint="gemini-2.5-flash", model_params=model_params
prompt, endpoint="gemini-2.5-pro", model_params=model_params
)

assert _contains_no_nulls(result)
Expand All @@ -121,7 +121,7 @@ def test_ai_generate(session):
country = bpd.Series(["Japan", "Canada"], session=session)
prompt = ("What's the capital city of ", country, "? one word only")

result = bbq.ai.generate(prompt, endpoint="gemini-2.5-flash")
result = bbq.ai.generate(prompt, endpoint="gemini-2.5-pro")

assert _contains_no_nulls(result)
assert result.dtype == pd.ArrowDtype(
Expand All @@ -141,7 +141,7 @@ def test_ai_generate_with_output_schema(session):

result = bbq.ai.generate(
prompt,
endpoint="gemini-2.5-flash",
endpoint="gemini-2.5-pro",
output_schema={"population": "INT64", "is_in_north_america": "bool"},
)

Expand All @@ -165,7 +165,7 @@ def test_ai_generate_with_invalid_output_schema_raise_error(session):
with pytest.raises(ValueError):
bbq.ai.generate(
prompt,
endpoint="gemini-2.5-flash",
endpoint="gemini-2.5-pro",
output_schema={"population": "INT64", "is_in_north_america": "JSON"},
)

Expand All @@ -175,7 +175,7 @@ def test_ai_generate_bool(session):
s2 = bpd.Series(["fruit", "tree"], session=session)
prompt = (s1, " is a ", s2)

result = bbq.ai.generate_bool(prompt, endpoint="gemini-2.5-flash")
result = bbq.ai.generate_bool(prompt, endpoint="gemini-2.5-pro")

assert _contains_no_nulls(result)
assert result.dtype == pd.ArrowDtype(
Expand Down Expand Up @@ -216,7 +216,7 @@ def test_ai_generate_int(session):
s = bpd.Series(["Cat"], session=session)
prompt = ("How many legs does a ", s, " have?")

result = bbq.ai.generate_int(prompt, endpoint="gemini-2.5-flash")
result = bbq.ai.generate_int(prompt, endpoint="gemini-2.5-pro")

assert _contains_no_nulls(result)
assert result.dtype == pd.ArrowDtype(
Expand Down Expand Up @@ -259,7 +259,7 @@ def test_ai_generate_double(session):
s = bpd.Series(["Cat"], session=session)
prompt = ("How many legs does a ", s, " have?")

result = bbq.ai.generate_double(prompt, endpoint="gemini-2.5-flash")
result = bbq.ai.generate_double(prompt, endpoint="gemini-2.5-pro")

assert _contains_no_nulls(result)
assert result.dtype == pd.ArrowDtype(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ SELECT
input => STRUCT(`string_col`),
categories => ['greeting', 'rejection'],
examples => [('hi', 'greeting'), ('bye', 'rejection')],
endpoint => 'gemini-2.5-flash',
endpoint => 'gemini-2.5-pro',
max_error_ratio => 0.1
) AS `result`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
SELECT
AI.GENERATE(
prompt => STRUCT(`string_col`, ' is the same as ', `string_col`),
endpoint => 'gemini-2.5-flash',
endpoint => 'gemini-2.5-pro',
request_type => 'SHARED'
) AS `result`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SELECT
AI.GENERATE_BOOL(
prompt => STRUCT(`string_col`, ' is the same as ', `string_col`),
endpoint => 'gemini-2.5-flash'
endpoint => 'gemini-2.5-pro'
) AS `result`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ SELECT
AI.GENERATE_BOOL(
prompt => STRUCT(`string_col`, ' is the same as ', `string_col`),
connection_id => 'bigframes-dev.us.bigframes-default-connection',
endpoint => 'gemini-2.5-flash'
endpoint => 'gemini-2.5-pro'
) AS `result`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SELECT
AI.GENERATE_DOUBLE(
prompt => STRUCT(`string_col`, ' is the same as ', `string_col`),
endpoint => 'gemini-2.5-flash'
endpoint => 'gemini-2.5-pro'
) AS `result`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ SELECT
AI.GENERATE_DOUBLE(
prompt => STRUCT(`string_col`, ' is the same as ', `string_col`),
connection_id => 'bigframes-dev.us.bigframes-default-connection',
endpoint => 'gemini-2.5-flash'
endpoint => 'gemini-2.5-pro'
) AS `result`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SELECT
AI.GENERATE_INT(
prompt => STRUCT(`string_col`, ' is the same as ', `string_col`),
endpoint => 'gemini-2.5-flash'
endpoint => 'gemini-2.5-pro'
) AS `result`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ SELECT
AI.GENERATE_INT(
prompt => STRUCT(`string_col`, ' is the same as ', `string_col`),
connection_id => 'bigframes-dev.us.bigframes-default-connection',
endpoint => 'gemini-2.5-flash'
endpoint => 'gemini-2.5-pro'
) AS `result`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ SELECT
AI.GENERATE(
prompt => STRUCT(`string_col`, ' is the same as ', `string_col`),
connection_id => 'bigframes-dev.us.bigframes-default-connection',
endpoint => 'gemini-2.5-flash'
endpoint => 'gemini-2.5-pro'
) AS `result`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
SELECT
AI.GENERATE(
prompt => STRUCT(`string_col`, ' is the same as ', `string_col`),
endpoint => 'gemini-2.5-flash',
endpoint => 'gemini-2.5-pro',
output_schema => 'x INT64, y FLOAT64'
) AS `result`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SELECT
AI.IF(
prompt => STRUCT(`string_col`, ' is the same as ', `string_col`),
endpoint => 'gemini-2.5-flash'
endpoint => 'gemini-2.5-pro'
) AS `result`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
SELECT
AI.SCORE(
prompt => STRUCT(`string_col`, ' is the same as ', `string_col`),
endpoint => 'gemini-2.5-flash',
endpoint => 'gemini-2.5-pro',
max_error_ratio => 0.5
) AS `result`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_ai_generate(scalar_types_df: dataframe.DataFrame, snapshot):

op = ops.AIGenerate(
prompt_context=(None, " is the same as ", None),
endpoint="gemini-2.5-flash",
endpoint="gemini-2.5-pro",
request_type="SHARED",
)

Expand All @@ -47,7 +47,7 @@ def test_ai_generate_with_connection_id(scalar_types_df: dataframe.DataFrame, sn
op = ops.AIGenerate(
prompt_context=(None, " is the same as ", None),
connection_id=CONNECTION_ID,
endpoint="gemini-2.5-flash",
endpoint="gemini-2.5-pro",
)

sql = utils._apply_ops_to_sql(
Expand All @@ -63,7 +63,7 @@ def test_ai_generate_with_output_schema(scalar_types_df: dataframe.DataFrame, sn
op = ops.AIGenerate(
prompt_context=(None, " is the same as ", None),
connection_id=None,
endpoint="gemini-2.5-flash",
endpoint="gemini-2.5-pro",
output_schema="x INT64, y FLOAT64",
)

Expand Down Expand Up @@ -94,7 +94,7 @@ def test_ai_generate_bool(scalar_types_df: dataframe.DataFrame, snapshot):

op = ops.AIGenerateBool(
prompt_context=(None, " is the same as ", None),
endpoint="gemini-2.5-flash",
endpoint="gemini-2.5-pro",
)

sql = utils._apply_ops_to_sql(
Expand All @@ -112,7 +112,7 @@ def test_ai_generate_bool_with_connection_id(
op = ops.AIGenerateBool(
prompt_context=(None, " is the same as ", None),
connection_id=CONNECTION_ID,
endpoint="gemini-2.5-flash",
endpoint="gemini-2.5-pro",
)

sql = utils._apply_ops_to_sql(
Expand Down Expand Up @@ -145,7 +145,7 @@ def test_ai_generate_int(scalar_types_df: dataframe.DataFrame, snapshot):
op = ops.AIGenerateInt(
# The prompt does not make semantic sense but we only care about syntax correctness.
prompt_context=(None, " is the same as ", None),
endpoint="gemini-2.5-flash",
endpoint="gemini-2.5-pro",
)

sql = utils._apply_ops_to_sql(
Expand All @@ -164,7 +164,7 @@ def test_ai_generate_int_with_connection_id(
# The prompt does not make semantic sense but we only care about syntax correctness.
prompt_context=(None, " is the same as ", None),
connection_id=CONNECTION_ID,
endpoint="gemini-2.5-flash",
endpoint="gemini-2.5-pro",
)

sql = utils._apply_ops_to_sql(
Expand Down Expand Up @@ -198,7 +198,7 @@ def test_ai_generate_double(scalar_types_df: dataframe.DataFrame, snapshot):
op = ops.AIGenerateDouble(
# The prompt does not make semantic sense but we only care about syntax correctness.
prompt_context=(None, " is the same as ", None),
endpoint="gemini-2.5-flash",
endpoint="gemini-2.5-pro",
)

sql = utils._apply_ops_to_sql(
Expand All @@ -217,7 +217,7 @@ def test_ai_generate_double_with_connection_id(
# The prompt does not make semantic sense but we only care about syntax correctness.
prompt_context=(None, " is the same as ", None),
connection_id=CONNECTION_ID,
endpoint="gemini-2.5-flash",
endpoint="gemini-2.5-pro",
)

sql = utils._apply_ops_to_sql(
Expand Down Expand Up @@ -322,7 +322,7 @@ def test_ai_if_with_endpoint(scalar_types_df: dataframe.DataFrame, snapshot):

op = ops.AIIf(
prompt_context=(None, " is the same as ", None),
endpoint="gemini-2.5-flash",
endpoint="gemini-2.5-pro",
)

sql = utils._apply_ops_to_sql(
Expand Down Expand Up @@ -354,7 +354,7 @@ def test_ai_classify_with_params(scalar_types_df: dataframe.DataFrame, snapshot)
prompt_context=(None,),
categories=("greeting", "rejection"),
examples=(("hi", "greeting"), ("bye", "rejection")),
endpoint="gemini-2.5-flash",
endpoint="gemini-2.5-pro",
max_error_ratio=0.1,
)

Expand Down Expand Up @@ -421,7 +421,7 @@ def test_ai_score_with_endpoint_and_max_error_ratio(

op = ops.AIScore(
prompt_context=(None, " is the same as ", None),
endpoint="gemini-2.5-flash",
endpoint="gemini-2.5-pro",
max_error_ratio=0.5,
)

Expand Down
Loading
Loading