Skip to content

Commit 729f40b

Browse files
committed
resolve the comments
1 parent da9938c commit 729f40b

File tree

4 files changed

+75
-50
lines changed

4 files changed

+75
-50
lines changed

bigframes/dataframe.py

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
import bigframes.exceptions as bfe
7777
import bigframes.formatting_helpers as formatter
7878
import bigframes.functions
79+
from bigframes.functions import function_typing
7980
import bigframes.operations as ops
8081
import bigframes.operations.aggregations as agg_ops
8182
import bigframes.operations.ai
@@ -4815,11 +4816,11 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs):
48154816
else:
48164817
# This is a special case where we are providing not-pandas-like
48174818
# extension. If the bigquery function can take one or more
4818-
# params (exclude the args) then we assume that here the user
4819+
# params (excluding the args) then we assume that here the user
48194820
# intention is to use the column values of the dataframe as
48204821
# arguments to the function. For this to work the following
48214822
# condition must be true:
4822-
# 1. The number or input params (exclude the args) in the
4823+
# 1. The number or input params (excluding the args) in the
48234824
# function must be same as the number of columns in the
48244825
# dataframe.
48254826
# 2. The dtypes of the columns in the dataframe must be
@@ -4829,23 +4830,35 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs):
48294830
udf_input_dtypes = func.udf_def.signature.bf_input_types
48304831
if len(udf_input_dtypes) != len(self.columns) + len(args):
48314832
raise ValueError(
4832-
f"Column count mismatch: BigFrames BigQuery function"
4833-
f" expected {len(udf_input_dtypes) - len(args)} columns"
4834-
f" from DataFrame but received {len(self.columns)}."
4833+
f"Parameter count mismatch: BigFrames BigQuery function"
4834+
f" (including the args) expected {len(udf_input_dtypes)}"
4835+
f" but received {len(self.columns) + len(args)}."
48354836
)
48364837
end_slice = -len(args) if args else None
48374838
if udf_input_dtypes[:end_slice] != tuple(self.dtypes.to_list()):
48384839
raise ValueError(
4839-
f"Data type mismatch: BigFrames BigQuery function takes"
4840-
f" arguments of types {udf_input_dtypes} but DataFrame"
4841-
f" dtypes are {tuple(self.dtypes)}."
4840+
f"Data type mismatch for DataFrame columns:"
4841+
f" Expected {udf_input_dtypes[:end_slice]}"
4842+
f" Received {tuple(self.dtypes)}."
48424843
)
4844+
if args:
4845+
bq_types = (
4846+
function_typing.sdk_type_from_python_type(type(arg))
4847+
for arg in args
4848+
)
4849+
args_dtype = tuple(
4850+
function_typing.sdk_type_to_bf_type(bq_type)
4851+
for bq_type in bq_types
4852+
)
4853+
if udf_input_dtypes[end_slice:] != args_dtype:
4854+
raise ValueError(
4855+
f"Data type mismatch for 'args' parameter:"
4856+
f" Expected {udf_input_dtypes[end_slice:]}"
4857+
f" Received {args_dtype}."
4858+
)
48434859

48444860
series_list = [self[col] for col in self.columns]
4845-
if args:
4846-
op_list = series_list[1:] + list(args)
4847-
else:
4848-
op_list = series_list[1:]
4861+
op_list = series_list[1:] + list(args)
48494862
result_series = series_list[0]._apply_nary_op(
48504863
ops.NaryRemoteFunctionOp(function_def=func.udf_def), op_list
48514864
)

bigframes/functions/_function_session.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -959,15 +959,16 @@ def _convert_row_processor_sig(
959959
) -> Optional[inspect.Signature]:
960960
import bigframes.series as bf_series
961961

962-
first_param = next(iter(signature.parameters.values()))
963-
param_type = first_param.annotation
964-
if (param_type == bf_series.Series) or (param_type == pandas.Series):
965-
msg = bfe.format_message("input_types=Series is in preview.")
966-
warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning)
967-
return signature.replace(
968-
parameters=[
969-
p.replace(annotation=str) if i == 0 else p
970-
for i, p in enumerate(signature.parameters.values())
971-
]
972-
)
962+
if len(signature.parameters) >= 1:
963+
first_param = next(iter(signature.parameters.values()))
964+
param_type = first_param.annotation
965+
if (param_type == bf_series.Series) or (param_type == pandas.Series):
966+
msg = bfe.format_message("input_types=Series is in preview.")
967+
warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning)
968+
return signature.replace(
969+
parameters=[
970+
p.replace(annotation=str) if i == 0 else p
971+
for i, p in enumerate(signature.parameters.values())
972+
]
973+
)
973974
return None

tests/system/large/functions/test_managed_function.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -467,21 +467,19 @@ def foo(x, y, z):
467467

468468
# Fails to apply on dataframe with incompatible number of columns.
469469
with pytest.raises(
470-
ValueError,
471-
match="^Column count mismatch: BigFrames BigQuery function expected 3 columns from DataFrame but received 2\\.$",
470+
ValueError, match="^Parameter count mismatch:.* expected 3 but received 2."
472471
):
473472
bf_df[["Id", "Age"]].apply(foo, axis=1)
474473

475474
with pytest.raises(
476-
ValueError,
477-
match="^Column count mismatch: BigFrames BigQuery function expected 3 columns from DataFrame but received 4\\.$",
475+
ValueError, match="^Parameter count mismatch:.* expected 3 but received 4."
478476
):
479477
bf_df.assign(Country="lalaland").apply(foo, axis=1)
480478

481479
# Fails to apply on dataframe with incompatible column datatypes.
482480
with pytest.raises(
483481
ValueError,
484-
match="^Data type mismatch: BigFrames BigQuery function takes arguments of types .* but DataFrame dtypes are .*",
482+
match="^Data type mismatch for DataFrame columns: Expected .* Received .*",
485483
):
486484
bf_df.assign(Age=bf_df["Age"].astype("Int64")).apply(foo, axis=1)
487485

@@ -985,20 +983,26 @@ def the_sum(s1, s2, x):
985983

986984
# Fails to apply on dataframe with incompatible number of columns.
987985
with pytest.raises(
988-
ValueError,
989-
match="^Column count mismatch: BigFrames BigQuery function expected 2 columns from DataFrame but received 3\\.$",
986+
ValueError, match="^Parameter count mismatch:.* expected 3 but received 4."
990987
):
991988
scalars_df[columns + ["float64_col"]].apply(the_sum_mf, axis=1, args=args1)
992989

993990
# Fails to apply on dataframe with incompatible column datatypes.
994991
with pytest.raises(
995992
ValueError,
996-
match="^Data type mismatch: BigFrames BigQuery function takes arguments of types .* but DataFrame dtypes are .*",
993+
match="^Data type mismatch for DataFrame columns: Expected .* Received .*",
997994
):
998995
scalars_df[columns].assign(
999996
int64_col=lambda df: df["int64_col"].astype("Float64")
1000997
).apply(the_sum_mf, axis=1, args=args1)
1001998

999+
# Fails to apply on dataframe with incompatible args datatypes.
1000+
with pytest.raises(
1001+
ValueError,
1002+
match="^Data type mismatch for 'args' parameter: Expected .* Received .*",
1003+
):
1004+
scalars_df[columns].apply(the_sum_mf, axis=1, args=(1.3,))
1005+
10021006
bf_result = (
10031007
scalars_df[columns]
10041008
.dropna()

tests/system/large/functions/test_remote_function.py

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1958,20 +1958,33 @@ def the_sum(s1, s2, x):
19581958

19591959
# Fails to apply on dataframe with incompatible number of columns.
19601960
with pytest.raises(
1961-
ValueError,
1962-
match="^Column count mismatch: BigFrames BigQuery function expected 2 columns from DataFrame but received 3\\.$",
1961+
ValueError, match="^Parameter count mismatch:.* expected 3 but received 4."
19631962
):
1964-
scalars_df[columns + ["float64_col"]].apply(the_sum_mf, axis=1, args=args1)
1963+
scalars_df[columns].apply(
1964+
the_sum_mf,
1965+
axis=1,
1966+
args=(
1967+
1,
1968+
1,
1969+
),
1970+
)
19651971

19661972
# Fails to apply on dataframe with incompatible column datatypes.
19671973
with pytest.raises(
19681974
ValueError,
1969-
match="^Data type mismatch: BigFrames BigQuery function takes arguments of types .* but DataFrame dtypes are .*",
1975+
match="^Data type mismatch for DataFrame columns: Expected .* Received .*",
19701976
):
19711977
scalars_df[columns].assign(
19721978
int64_col=lambda df: df["int64_col"].astype("Float64")
19731979
).apply(the_sum_mf, axis=1, args=args1)
19741980

1981+
# Fails to apply on dataframe with incompatible args datatypes.
1982+
with pytest.raises(
1983+
ValueError,
1984+
match="^Data type mismatch for 'args' parameter: Expected .* Received .*",
1985+
):
1986+
scalars_df[columns].apply(the_sum_mf, axis=1, args=("hello world",))
1987+
19751988
bf_result = (
19761989
scalars_df[columns]
19771990
.dropna()
@@ -2293,20 +2306,18 @@ def foo(x, y, z):
22932306

22942307
# Fails to apply on dataframe with incompatible number of columns
22952308
with pytest.raises(
2296-
ValueError,
2297-
match="^Column count mismatch: BigFrames BigQuery function expected 3 columns from DataFrame but received 2\\.$",
2309+
ValueError, match="^Parameter count mismatch:.* expected 3 but received 2."
22982310
):
22992311
bf_df[["Id", "Age"]].apply(foo, axis=1)
23002312
with pytest.raises(
2301-
ValueError,
2302-
match="^Column count mismatch: BigFrames BigQuery function expected 3 columns from DataFrame but received 4\\.$",
2313+
ValueError, match="^Parameter count mismatch:.* expected 3 but received 4."
23032314
):
23042315
bf_df.assign(Country="lalaland").apply(foo, axis=1)
23052316

23062317
# Fails to apply on dataframe with incompatible column datatypes
23072318
with pytest.raises(
23082319
ValueError,
2309-
match="^Data type mismatch: BigFrames BigQuery function takes arguments of types .* but DataFrame dtypes are .*",
2320+
match="^Data type mismatch for DataFrame columns: Expected .* Received .*",
23102321
):
23112322
bf_df.assign(Age=bf_df["Age"].astype("Int64")).apply(foo, axis=1)
23122323

@@ -2377,20 +2388,18 @@ def foo(x, y, z):
23772388

23782389
# Fails to apply on dataframe with incompatible number of columns
23792390
with pytest.raises(
2380-
ValueError,
2381-
match="^Column count mismatch: BigFrames BigQuery function expected 3 columns from DataFrame but received 2\\.$",
2391+
ValueError, match="^Parameter count mismatch:.* expected 3 but received 2."
23822392
):
23832393
bf_df[["Id", "Age"]].apply(foo, axis=1)
23842394
with pytest.raises(
2385-
ValueError,
2386-
match="^Column count mismatch: BigFrames BigQuery function expected 3 columns from DataFrame but received 4\\.$",
2395+
ValueError, match="^Parameter count mismatch:.* expected 3 but received 4."
23872396
):
23882397
bf_df.assign(Country="lalaland").apply(foo, axis=1)
23892398

23902399
# Fails to apply on dataframe with incompatible column datatypes
23912400
with pytest.raises(
23922401
ValueError,
2393-
match="^Data type mismatch: BigFrames BigQuery function takes arguments of types .* but DataFrame dtypes are .*",
2402+
match="^Data type mismatch for DataFrame columns: Expected .* Received .*",
23942403
):
23952404
bf_df.assign(Age=bf_df["Age"].astype("Int64")).apply(foo, axis=1)
23962405

@@ -2451,20 +2460,18 @@ def foo(x):
24512460

24522461
# Fails to apply on dataframe with incompatible number of columns
24532462
with pytest.raises(
2454-
ValueError,
2455-
match="^Column count mismatch: BigFrames BigQuery function expected 1 columns from DataFrame but received 0\\.$",
2463+
ValueError, match="^Parameter count mismatch:.* expected 1 but received 0."
24562464
):
24572465
bf_df[[]].apply(foo, axis=1)
24582466
with pytest.raises(
2459-
ValueError,
2460-
match="^Column count mismatch: BigFrames BigQuery function expected 1 columns from DataFrame but received 2\\.$",
2467+
ValueError, match="^Parameter count mismatch:.* expected 1 but received 2."
24612468
):
24622469
bf_df.assign(Country="lalaland").apply(foo, axis=1)
24632470

24642471
# Fails to apply on dataframe with incompatible column datatypes
24652472
with pytest.raises(
24662473
ValueError,
2467-
match="^Data type mismatch: BigFrames BigQuery function takes arguments of types .* but DataFrame dtypes are .*",
2474+
match="^Data type mismatch for DataFrame columns: Expected .* Received .*",
24682475
):
24692476
bf_df.assign(Id=bf_df["Id"].astype("Float64")).apply(foo, axis=1)
24702477

0 commit comments

Comments
 (0)