@@ -1435,3 +1435,73 @@ def test_coalesce(df):
14351435 assert result .column (0 ) == pa .array (
14361436 ["Hello" , "fallback" , "!" ], type = pa .string_view ()
14371437 )
1438+
1439+
1440+ def test_get_field (df ):
1441+ df = df .with_column (
1442+ "s" ,
1443+ f .named_struct (
1444+ [
1445+ ("x" , column ("a" )),
1446+ ("y" , column ("b" )),
1447+ ]
1448+ ),
1449+ )
1450+ result = df .select (
1451+ f .get_field (column ("s" ), string_literal ("x" )).alias ("x_val" ),
1452+ f .get_field (column ("s" ), string_literal ("y" )).alias ("y_val" ),
1453+ ).collect ()[0 ]
1454+
1455+ assert result .column (0 ) == pa .array (["Hello" , "World" , "!" ], type = pa .string_view ())
1456+ assert result .column (1 ) == pa .array ([4 , 5 , 6 ])
1457+
1458+
1459+ def test_arrow_metadata (df ):
1460+ result = df .select (
1461+ f .arrow_metadata (column ("a" )).alias ("meta" ),
1462+ ).collect ()[0 ]
1463+ # The metadata column should be returned as a map type (possibly empty)
1464+ assert result .column (0 ).type == pa .map_ (pa .utf8 (), pa .utf8 ())
1465+
1466+
1467+ def test_version ():
1468+ ctx = SessionContext ()
1469+ df = ctx .from_pydict ({"a" : [1 ]})
1470+ result = df .select (f .version ().alias ("v" )).collect ()[0 ]
1471+ version_str = result .column (0 )[0 ].as_py ()
1472+ assert "Apache DataFusion" in version_str
1473+
1474+
1475+ def test_row (df ):
1476+ result = df .select (
1477+ f .row (column ("a" ), column ("b" )).alias ("r" ),
1478+ f .struct (column ("a" ), column ("b" )).alias ("s" ),
1479+ ).collect ()[0 ]
1480+ # row is an alias for struct, so they should produce the same output
1481+ assert result .column (0 ) == result .column (1 )
1482+
1483+
1484+ def test_union_tag ():
1485+ ctx = SessionContext ()
1486+ types = pa .array ([0 , 1 , 0 ], type = pa .int8 ())
1487+ offsets = pa .array ([0 , 0 , 1 ], type = pa .int32 ())
1488+ children = [pa .array ([1 , 2 ]), pa .array (["hello" ])]
1489+ arr = pa .UnionArray .from_dense (types , offsets , children , ["int" , "str" ], [0 , 1 ])
1490+ df = ctx .create_dataframe ([[pa .RecordBatch .from_arrays ([arr ], names = ["u" ])]])
1491+
1492+ result = df .select (f .union_tag (column ("u" )).alias ("tag" )).collect ()[0 ]
1493+ assert result .column (0 ).to_pylist () == ["int" , "str" , "int" ]
1494+
1495+
1496+ def test_union_extract ():
1497+ ctx = SessionContext ()
1498+ types = pa .array ([0 , 1 , 0 ], type = pa .int8 ())
1499+ offsets = pa .array ([0 , 0 , 1 ], type = pa .int32 ())
1500+ children = [pa .array ([1 , 2 ]), pa .array (["hello" ])]
1501+ arr = pa .UnionArray .from_dense (types , offsets , children , ["int" , "str" ], [0 , 1 ])
1502+ df = ctx .create_dataframe ([[pa .RecordBatch .from_arrays ([arr ], names = ["u" ])]])
1503+
1504+ result = df .select (
1505+ f .union_extract (column ("u" ), string_literal ("int" )).alias ("val" )
1506+ ).collect ()[0 ]
1507+ assert result .column (0 ).to_pylist () == [1 , None , 2 ]
0 commit comments