Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions docs/source/user-guide/latest/compatibility.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,6 @@ Cast operations in Comet fall into three levels of support:
Spark.
- **N/A**: Spark does not support this cast.

### Negative Zero

When casting floating-point values to strings, Spark normalizes negative zero (`-0.0`) to `"0.0"`, but Comet
may produce `"-0.0"`. Since negative zero and positive zero are semantically equivalent (`-0.0 == 0.0` is true
in IEEE 754), this difference is unlikely to affect real-world results. See
[#1036](https://github.com/apache/datafusion-comet/issues/1036) for more details.

### Legacy Mode

<!--BEGIN:CAST_LEGACY_TABLE-->
Expand Down
7 changes: 6 additions & 1 deletion native/spark-expr/src/conversion_funcs/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,12 @@ macro_rules! cast_float_to_string {
if value.abs() >= UPPER_SCIENTIFIC_BOUND
|| value.abs() < LOWER_SCIENTIFIC_BOUND =>
{
let formatted = format!("{value:E}");
let formatted = if value.is_subnormal() {
// FIXME: this is not aligned with Java
format!("{value:.1E}")
} else {
format!("{value:E}")
};

if formatted.contains(".") {
Ok(Some(formatted))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,12 +236,7 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
Compatible()
case DataTypes.DateType => Compatible()
case DataTypes.TimestampType => Compatible()
case DataTypes.FloatType | DataTypes.DoubleType =>
Compatible(
Some(
"There can be differences in precision. " +
"For example, the input \"1.4E-45\" will produce 1.0E-45 " +
"instead of 1.4E-45"))
case DataTypes.FloatType | DataTypes.DoubleType => Compatible()
case _: DecimalType =>
// https://github.com/apache/datafusion-comet/issues/1068
Compatible(
Expand Down
34 changes: 5 additions & 29 deletions spark/src/test/scala/org/apache/comet/CometCastSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -486,21 +486,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
}

test("cast FloatType to StringType") {
// https://github.com/apache/datafusion-comet/issues/312
val r = new Random(0)
val values = Seq(
Float.MaxValue,
Float.MinValue,
Float.NaN,
Float.PositiveInfinity,
Float.NegativeInfinity,
1.0f,
-1.0f,
Short.MinValue.toFloat,
Short.MaxValue.toFloat,
0.0f) ++
Range(0, dataSize).map(_ => r.nextFloat())
castTest(withNulls(values).toDF("a"), DataTypes.StringType)
castTest(generateFloats(), DataTypes.StringType)
}

test("cast FloatType to TimestampType") {
Expand Down Expand Up @@ -550,17 +536,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
}

test("cast DoubleType to StringType") {
// https://github.com/apache/datafusion-comet/issues/312
val r = new Random(0)
val values = Seq(
Double.MaxValue,
Double.MinValue,
Double.NaN,
Double.PositiveInfinity,
Double.NegativeInfinity,
0.0d) ++
Range(0, dataSize).map(_ => r.nextDouble())
castTest(withNulls(values).toDF("a"), DataTypes.StringType)
castTest(generateDoubles(), DataTypes.StringType)
}

test("cast DoubleType to TimestampType") {
Expand Down Expand Up @@ -1594,7 +1570,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {

withSQLConf((SQLConf.ANSI_ENABLED.key, "false")) {
// cast() should return null for invalid inputs when ansi mode is disabled
val df = data.select(col("a"), col("a").cast(toType)).orderBy(col("a"))
val df = data.select(col("a"), col("a").cast(toType))
if (useDataFrameDiff) {
assertDataFrameEqualsWithExceptions(df, assertCometNative = !hasIncompatibleType)
} else {
Expand All @@ -1609,7 +1585,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
data.createOrReplaceTempView("t")
// try_cast() should always return null for invalid inputs
// not using spark DSL since it `try_cast` is only available from Spark 4x
val df2 = spark.sql(s"select a, try_cast(a as ${toType.sql}) from t order by a")
val df2 = spark.sql(s"select a, try_cast(a as ${toType.sql}) from t")
if (hasIncompatibleType) {
checkSparkAnswer(df2)
} else {
Expand Down Expand Up @@ -1677,7 +1653,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
// try_cast() should always return null for invalid inputs
if (testTry) {
data.createOrReplaceTempView("t")
val df2 = spark.sql(s"select a, try_cast(a as ${toType.sql}) from t order by a")
val df2 = spark.sql(s"select a, try_cast(a as ${toType.sql}) from t")
if (useDataFrameDiff) {
assertDataFrameEqualsWithExceptions(df2, assertCometNative = !hasIncompatibleType)
} else {
Expand Down
20 changes: 18 additions & 2 deletions spark/src/test/scala/org/apache/comet/DataGenerator.scala
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class DataGenerator(r: Random) {
Seq(
Float.MaxValue,
Float.MinPositiveValue,
Float.MinPositiveValue * 2,
Float.MinValue,
Float.NaN,
Float.PositiveInfinity,
Expand All @@ -69,20 +70,35 @@ class DataGenerator(r: Random) {
-1.0f,
Short.MinValue.toFloat,
Short.MaxValue.toFloat,
0.0f,
-0.0f,
0.0f) ++
Range(0, n).map(_ => r.nextFloat())
Range(0, n).map(_ => r.nextFloat()) ++
Range(0, n).map{_ =>
Float.MinPositiveValue + r.nextFloat() * (java.lang.Float.MIN_NORMAL - Float.MinPositiveValue)
}
}

def generateDoubles(n: Int): Seq[Double] = {
Seq(
Double.MaxValue,
Double.MinPositiveValue,
Double.MinPositiveValue * 2,
Double.MinValue,
Double.NaN,
Double.PositiveInfinity,
Double.NegativeInfinity,
1.0d,
-1.0d,
Int.MinValue.toDouble,
Int.MaxValue.toDouble,
0.0d,
-0.0d,
0.0d) ++
Range(0, n).map(_ => r.nextDouble())
Range(0, n).map(_ => r.nextDouble()) ++
Range(0, n).map{_ =>
Double.MinPositiveValue + r.nextDouble() * (java.lang.Double.MIN_NORMAL - Double.MinPositiveValue)
}
}

def generateBytes(n: Int): Seq[Byte] = {
Expand Down
Loading