|
13 | 13 | # limitations under the License. |
14 | 14 |
|
15 | 15 | import pandas.testing |
| 16 | +import pyarrow |
16 | 17 |
|
17 | 18 | from bigframes import dtypes |
18 | 19 |
|
@@ -53,6 +54,76 @@ def test_type_system_examples() -> None: |
53 | 54 | check_index_type=False, |
54 | 55 | ) |
55 | 56 |
|
| 57 | + # [START bigquery_dataframes_type_system_simple_json] |
| 58 | + import pandas as pd |
| 59 | + |
| 60 | + import bigframes.pandas as bpd |
| 61 | + |
| 62 | + json_data = [ |
| 63 | + "1", |
| 64 | + '"str"', |
| 65 | + "false", |
| 66 | + '["a",{"b":1},null]', |
| 67 | + '{"a":{"b":[1,2,3],"c":true}}', |
| 68 | + None, |
| 69 | + ] |
| 70 | + bpd.Series(json_data, dtype="json") |
| 71 | + # 0 1 |
| 72 | + # 1 "str" |
| 73 | + # 2 false |
| 74 | + # 3 ["a",{"b":1},null] |
| 75 | + # 4 {"a":{"b":[1,2,3],"c":true}} |
| 76 | + # 5 <NA> |
| 77 | + # [END bigquery_dataframes_type_system_simple_json] |
| 78 | + pandas.testing.assert_series_equal( |
| 79 | + bpd.Series(json_data, dtype=dtypes.JSON_DTYPE).to_pandas(), |
| 80 | + pd.Series(json_data, dtype=dtypes.JSON_DTYPE), |
| 81 | + check_index_type=False, |
| 82 | + ) |
| 83 | + |
| 84 | + assert pyarrow.__version__.startswith("19.") or pyarrow.__version__.startswith("2") |
| 85 | + if hasattr(pyarrow, "JsonType"): |
| 86 | + # [START bigquery_dataframes_type_system_mixed_json] |
| 87 | + import db_dtypes |
| 88 | + import pandas as pd |
| 89 | + import pyarrow as pa |
| 90 | + |
| 91 | + import bigframes.pandas as bpd |
| 92 | + |
| 93 | + list_data = [ |
| 94 | + [{"key": "1"}], |
| 95 | + [{"key": None}], |
| 96 | + [{"key": '["1","3","5"]'}], |
| 97 | + [{"key": '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}'}], |
| 98 | + ] |
| 99 | + pa_array = pa.array(list_data, type=pa.list_(pa.struct([("key", pa.string())]))) |
| 100 | + bpd.Series( |
| 101 | + pd.arrays.ArrowExtensionArray(pa_array), |
| 102 | + dtype=pd.ArrowDtype( |
| 103 | + pa.list_(pa.struct([("key", pa.json_(pa.string()))])), |
| 104 | + ), |
| 105 | + ) |
| 106 | + # 0 [{'key': '1'}] |
| 107 | + # 1 [{'key': None}] |
| 108 | + # 2 [{'key': '["1","3","5"]'}] |
| 109 | + # 3 [{'key': '{"a":1,"b":["x","y"],"c":{"x":[],"z"... |
| 110 | + # [END bigquery_dataframes_type_system_mixed_json] |
| 111 | + pandas.testing.assert_series_equal( |
| 112 | + bpd.Series( |
| 113 | + pd.arrays.ArrowExtensionArray(pa_array), |
| 114 | + dtype=pd.ArrowDtype( |
| 115 | + pa.list_(pa.struct([("key", pa.json_(pa.string()))])), |
| 116 | + ), |
| 117 | + ).to_pandas(), |
| 118 | + pd.Series( |
| 119 | + pd.arrays.ArrowExtensionArray(pa_array), |
| 120 | + dtype=pd.ArrowDtype( |
| 121 | + pa.list_(pa.struct([("key", db_dtypes.JSONArrowType())])), |
| 122 | + ), |
| 123 | + ), |
| 124 | + check_index_type=False, |
| 125 | + ) |
| 126 | + |
56 | 127 | # [START bigquery_dataframes_type_system_load_timedelta] |
57 | 128 | import pandas as pd |
58 | 129 |
|
@@ -233,3 +304,46 @@ def test_type_system_examples() -> None: |
233 | 304 | pd.Series([0.0, 1.0], dtype=dtypes.FLOAT_DTYPE), |
234 | 305 | check_index_type=False, |
235 | 306 | ) |
| 307 | + |
| 308 | + # [START bigquery_dataframes_type_system_json_query] |
| 309 | + import pandas as pd |
| 310 | + |
| 311 | + import bigframes.bigquery as bbq |
| 312 | + import bigframes.pandas as bpd |
| 313 | + |
| 314 | + fruits = [ |
| 315 | + '{"fruits": [{"name": "apple"}, {"name": "cherry"}]}', |
| 316 | + '{"fruits": [{"name": "guava"}, {"name": "grapes"}]}', |
| 317 | + ] |
| 318 | + |
| 319 | + json_s = bpd.Series(fruits, dtype="json") |
| 320 | + bbq.json_query(json_s, "$.fruits[0]") |
| 321 | + # 0 {"name":"apple"} |
| 322 | + # 1 {"name":"guava"} |
| 323 | + # [END bigquery_dataframes_type_system_json_query] |
| 324 | + pandas.testing.assert_series_equal( |
| 325 | + bbq.json_query(json_s, "$.fruits[0]").to_pandas(), |
| 326 | + pd.Series(['{"name":"apple"}', '{"name":"guava"}'], dtype=dtypes.JSON_DTYPE), |
| 327 | + check_index_type=False, |
| 328 | + ) |
| 329 | + |
| 330 | + # [START bigquery_dataframes_type_system_json_query_array] |
| 331 | + import pandas as pd |
| 332 | + |
| 333 | + import bigframes.bigquery as bbq |
| 334 | + import bigframes.pandas as bpd |
| 335 | + |
| 336 | + fruits = [ |
| 337 | + '{"fruits": [{"name": "apple"}, {"name": "cherry"}]}', |
| 338 | + '{"fruits": [{"name": "guava"}, {"name": "grapes"}]}', |
| 339 | + ] |
| 340 | + |
| 341 | + json_s = bpd.Series(fruits, dtype="json") |
| 342 | + |
| 343 | + bbq.json_query_array(json_s, "$.fruits") |
| 344 | + # 0 ['{"name":"apple"}' '{"name":"cherry"}'] |
| 345 | + # 1 ['{"name":"guava"}' '{"name":"grapes"}'] |
| 346 | + # [END bigquery_dataframes_type_system_json_query_array] |
| 347 | + |
| 348 | + # Can't test literals due to format issues |
| 349 | + assert len(bbq.json_extract_array(json_s, "$.fruits")) == 2 |
0 commit comments