Skip to content

Commit a96687e

Browse files
authored
Merge branch 'main' into dbt-sample
2 parents 1c3d5f6 + 25ab0d5 commit a96687e

File tree

16 files changed

+477
-9
lines changed

16 files changed

+477
-9
lines changed

bigframes/core/compile/scalar_op_compiler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -487,9 +487,9 @@ def isalpha_op_impl(x: ibis_types.Value):
487487

488488
@scalar_op_compiler.register_unary_op(ops.isdigit_op)
489489
def isdigit_op_impl(x: ibis_types.Value):
490-
# Based on docs, should include superscript/subscript-ed numbers
491-
# Tests however pass only when set to Nd unicode class
492-
return typing.cast(ibis_types.StringValue, x).re_search(r"^(\p{Nd})+$")
490+
return typing.cast(ibis_types.StringValue, x).re_search(
491+
r"^[\p{Nd}\x{00B9}\x{00B2}\x{00B3}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}]+$"
492+
)
493493

494494

495495
@scalar_op_compiler.register_unary_op(ops.isdecimal_op)

bigframes/core/compile/sqlglot/expressions/unary_compiler.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,52 @@
2323
from bigframes.core.compile.sqlglot.expressions.op_registration import OpRegistration
2424
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
2525

26+
_NAN = sge.Cast(this=sge.convert("NaN"), to="FLOAT64")
27+
_INF = sge.Cast(this=sge.convert("Infinity"), to="FLOAT64")
28+
29+
# Approx Highest number you can pass in to EXP function and get a valid FLOAT64 result
30+
# FLOAT64 has 11 exponent bits, so max values is about 2**(2**10)
31+
# ln(2**(2**10)) == (2**10)*ln(2) ~= 709.78, so EXP(x) for x>709.78 will overflow.
32+
_FLOAT64_EXP_BOUND = sge.convert(709.78)
33+
2634
UNARY_OP_REGISTRATION = OpRegistration()
2735

2836

2937
def compile(op: ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
3038
return UNARY_OP_REGISTRATION[op](op, expr)
3139

3240

41+
@UNARY_OP_REGISTRATION.register(ops.arccos_op)
42+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
43+
return sge.Case(
44+
ifs=[
45+
sge.If(
46+
this=sge.func("ABS", expr.expr) > sge.convert(1),
47+
true=_NAN,
48+
)
49+
],
50+
default=sge.func("ACOS", expr.expr),
51+
)
52+
53+
54+
@UNARY_OP_REGISTRATION.register(ops.arcsin_op)
55+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
56+
return sge.Case(
57+
ifs=[
58+
sge.If(
59+
this=sge.func("ABS", expr.expr) > sge.convert(1),
60+
true=_NAN,
61+
)
62+
],
63+
default=sge.func("ASIN", expr.expr),
64+
)
65+
66+
67+
@UNARY_OP_REGISTRATION.register(ops.arctan_op)
68+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
69+
return sge.func("ATAN", expr.expr)
70+
71+
3372
@UNARY_OP_REGISTRATION.register(ops.ArrayToStringOp)
3473
def _(op: ops.ArrayToStringOp, expr: TypedExpr) -> sge.Expression:
3574
return sge.ArrayToString(this=expr.expr, expression=f"'{op.delimiter}'")
@@ -72,6 +111,49 @@ def _(op: ops.ArraySliceOp, expr: TypedExpr) -> sge.Expression:
72111
return sge.array(selected_elements)
73112

74113

114+
@UNARY_OP_REGISTRATION.register(ops.cos_op)
115+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
116+
return sge.func("COS", expr.expr)
117+
118+
119+
@UNARY_OP_REGISTRATION.register(ops.hash_op)
120+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
121+
return sge.func("FARM_FINGERPRINT", expr.expr)
122+
123+
124+
@UNARY_OP_REGISTRATION.register(ops.isnull_op)
125+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
126+
return sge.Is(this=expr.expr, expression=sge.Null())
127+
128+
129+
@UNARY_OP_REGISTRATION.register(ops.notnull_op)
130+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
131+
return sge.Not(this=sge.Is(this=expr.expr, expression=sge.Null()))
132+
133+
134+
@UNARY_OP_REGISTRATION.register(ops.sin_op)
135+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
136+
return sge.func("SIN", expr.expr)
137+
138+
139+
@UNARY_OP_REGISTRATION.register(ops.sinh_op)
140+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
141+
return sge.Case(
142+
ifs=[
143+
sge.If(
144+
this=sge.func("ABS", expr.expr) > _FLOAT64_EXP_BOUND,
145+
true=sge.func("SIGN", expr.expr) * _INF,
146+
)
147+
],
148+
default=sge.func("SINH", expr.expr),
149+
)
150+
151+
152+
@UNARY_OP_REGISTRATION.register(ops.tan_op)
153+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
154+
return sge.func("TAN", expr.expr)
155+
156+
75157
# JSON Ops
76158
@UNARY_OP_REGISTRATION.register(ops.JSONExtract)
77159
def _(op: ops.JSONExtract, expr: TypedExpr) -> sge.Expression:
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def test_sessions_and_io(project_id: str, dataset_id: str) -> None:
17+
YOUR_PROJECT_ID = project_id
18+
YOUR_LOCATION = "us"
19+
20+
# [START bigquery_dataframes_create_and_use_session_instance]
21+
import bigframes
22+
import bigframes.pandas as bpd
23+
24+
# Create session object
25+
context = bigframes.BigQueryOptions(
26+
project=YOUR_PROJECT_ID,
27+
location=YOUR_LOCATION,
28+
)
29+
session = bigframes.Session(context)
30+
31+
# Load a BigQuery table into a dataframe
32+
df1 = session.read_gbq("bigquery-public-data.ml_datasets.penguins")
33+
34+
# Create a dataframe with local data:
35+
df2 = bpd.DataFrame({"my_col": [1, 2, 3]}, session=session)
36+
# [END bigquery_dataframes_create_and_use_session_instance]
37+
assert df1 is not None
38+
assert df2 is not None
39+
40+
# [START bigquery_dataframes_combine_data_from_multiple_sessions_raise_error]
41+
import bigframes
42+
import bigframes.pandas as bpd
43+
44+
context = bigframes.BigQueryOptions(location=YOUR_LOCATION, project=YOUR_PROJECT_ID)
45+
46+
session1 = bigframes.Session(context)
47+
session2 = bigframes.Session(context)
48+
49+
series1 = bpd.Series([1, 2, 3, 4, 5], session=session1)
50+
series2 = bpd.Series([1, 2, 3, 4, 5], session=session2)
51+
52+
try:
53+
series1 + series2
54+
except ValueError as e:
55+
print(e) # Error message: Cannot use combine sources from multiple sessions
56+
# [END bigquery_dataframes_combine_data_from_multiple_sessions_raise_error]
57+
58+
# [START bigquery_dataframes_set_options_for_global_session]
59+
import bigframes.pandas as bpd
60+
61+
# Set project ID for the global session
62+
bpd.options.bigquery.project = YOUR_PROJECT_ID
63+
# Update the global default session location
64+
bpd.options.bigquery.location = YOUR_LOCATION
65+
# [END bigquery_dataframes_set_options_for_global_session]
66+
67+
# [START bigquery_dataframes_global_session_is_the_default_session]
68+
# The following two statements are essentiall the same
69+
df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins")
70+
df = bpd.get_global_session().read_gbq("bigquery-public-data.ml_datasets.penguins")
71+
# [END bigquery_dataframes_global_session_is_the_default_session]
72+
assert df is not None
73+
74+
# [START bigquery_dataframes_create_dataframe_from_py_and_np]
75+
import numpy as np
76+
77+
import bigframes.pandas as bpd
78+
79+
s = bpd.Series([1, 2, 3])
80+
81+
# Create a dataframe with Python dict
82+
df = bpd.DataFrame(
83+
{
84+
"col_1": [1, 2, 3],
85+
"col_2": [4, 5, 6],
86+
}
87+
)
88+
89+
# Create a series with Numpy
90+
s = bpd.Series(np.arange(10))
91+
# [END bigquery_dataframes_create_dataframe_from_py_and_np]
92+
assert s is not None
93+
94+
# [START bigquery_dataframes_create_dataframe_from_pandas]
95+
import numpy as np
96+
import pandas as pd
97+
98+
import bigframes.pandas as bpd
99+
100+
pd_df = pd.DataFrame(np.random.randn(4, 2))
101+
102+
# Convert Pandas dataframe to BigQuery DataFrame with read_pandas()
103+
df_1 = bpd.read_pandas(pd_df)
104+
# Convert Pandas dataframe to BigQuery DataFrame with the dataframe constructor
105+
df_2 = bpd.DataFrame(pd_df)
106+
# [END bigquery_dataframes_create_dataframe_from_pandas]
107+
assert df_1 is not None
108+
assert df_2 is not None
109+
110+
# [START bigquery_dataframes_convert_bq_dataframe_to_pandas]
111+
import bigframes.pandas as bpd
112+
113+
bf_df = bpd.DataFrame({"my_col": [1, 2, 3]})
114+
# Returns a Pandas Dataframe
115+
bf_df.to_pandas()
116+
117+
bf_s = bpd.Series([1, 2, 3])
118+
# Returns a Pandas Series
119+
bf_s.to_pandas()
120+
# [END bigquery_dataframes_convert_bq_dataframe_to_pandas]
121+
assert bf_s.to_pandas() is not None
122+
123+
# [START bigquery_dataframes_to_pandas_dry_run]
124+
import bigframes.pandas as bpd
125+
126+
df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins")
127+
128+
# Returns a Pandas series with dry run stats
129+
df.to_pandas(dry_run=True)
130+
# [END bigquery_dataframes_to_pandas_dry_run]
131+
assert df.to_pandas(dry_run=True) is not None
132+
133+
# [START bigquery_dataframes_read_data_from_csv]
134+
import bigframes.pandas as bpd
135+
136+
# Read a CSV file from GCS
137+
df = bpd.read_csv("gs://cloud-samples-data/bigquery/us-states/us-states.csv")
138+
# [END bigquery_dataframes_read_data_from_csv]
139+
assert df is not None
140+
141+
# [START bigquery_dataframes_read_data_from_bigquery_table]
142+
import bigframes.pandas as bpd
143+
144+
df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins")
145+
# [END bigquery_dataframes_read_data_from_bigquery_table]
146+
assert df is not None
147+
148+
# [START bigquery_dataframes_read_from_sql_query]
149+
import bigframes.pandas as bpd
150+
151+
sql = """
152+
SELECT species, island, body_mass_g
153+
FROM bigquery-public-data.ml_datasets.penguins
154+
WHERE sex = 'MALE'
155+
"""
156+
157+
df = bpd.read_gbq(sql)
158+
# [END bigquery_dataframes_read_from_sql_query]
159+
assert df is not None
160+
161+
table_name = "snippets-session-and-io-test"
162+
163+
# [START bigquery_dataframes_dataframe_to_bigquery_table]
164+
import bigframes.pandas as bpd
165+
166+
df = bpd.DataFrame({"my_col": [1, 2, 3]})
167+
168+
df.to_gbq(f"{project_id}.{dataset_id}.{table_name}")
169+
# [END bigquery_dataframes_dataframe_to_bigquery_table]

tests/system/small/operations/test_strings.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -324,13 +324,10 @@ def test_isalpha(weird_strings, weird_strings_pd):
324324
)
325325

326326

327-
@pytest.mark.skipif(
328-
"dev" in pa.__version__,
329-
# b/333484335 pyarrow is inconsistent on the behavior
330-
reason="pyarrow dev version is inconsistent on isdigit behavior.",
331-
)
332327
def test_isdigit(weird_strings, weird_strings_pd):
333-
pd_result = weird_strings_pd.str.isdigit()
328+
# check the behavior against normal pandas str, since pyarrow has a bug with superscripts/fractions b/333484335
329+
# astype object instead of str to support pd.NA
330+
pd_result = weird_strings_pd.astype(object).str.isdigit()
334331
bf_result = weird_strings.str.isdigit().to_pandas()
335332

336333
pd.testing.assert_series_equal(
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`float64_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
CASE WHEN ABS(`bfcol_0`) > 1 THEN CAST('NaN' AS FLOAT64) ELSE ACOS(`bfcol_0`) END AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `float64_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`float64_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
CASE WHEN ABS(`bfcol_0`) > 1 THEN CAST('NaN' AS FLOAT64) ELSE ASIN(`bfcol_0`) END AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `float64_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`float64_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
ATAN(`bfcol_0`) AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `float64_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`float64_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
COS(`bfcol_0`) AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `float64_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
FARM_FINGERPRINT(`bfcol_0`) AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `string_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`float64_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
`bfcol_0` IS NULL AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `float64_col`
13+
FROM `bfcte_1`

0 commit comments

Comments
 (0)