diff --git a/docs/source/user-guide/latest/compatibility.md b/docs/source/user-guide/latest/compatibility.md index 3ec6656187..e0bc5f06ee 100644 --- a/docs/source/user-guide/latest/compatibility.md +++ b/docs/source/user-guide/latest/compatibility.md @@ -153,13 +153,6 @@ Cast operations in Comet fall into three levels of support: Spark. - **N/A**: Spark does not support this cast. -### Negative Zero - -When casting floating-point values to strings, Spark normalizes negative zero (`-0.0`) to `"0.0"`, but Comet -may produce `"-0.0"`. Since negative zero and positive zero are semantically equivalent (`-0.0 == 0.0` is true -in IEEE 754), this difference is unlikely to affect real-world results. See -[#1036](https://github.com/apache/datafusion-comet/issues/1036) for more details. - ### Legacy Mode diff --git a/native/spark-expr/src/conversion_funcs/numeric.rs b/native/spark-expr/src/conversion_funcs/numeric.rs index 59a65fb49f..6166ea9281 100644 --- a/native/spark-expr/src/conversion_funcs/numeric.rs +++ b/native/spark-expr/src/conversion_funcs/numeric.rs @@ -165,7 +165,12 @@ macro_rules! cast_float_to_string { if value.abs() >= UPPER_SCIENTIFIC_BOUND || value.abs() < LOWER_SCIENTIFIC_BOUND => { - let formatted = format!("{value:E}"); + let formatted = if value.is_subnormal() { + // FIXME: this is not aligned with Java + format!("{value:.1E}") + } else { + format!("{value:E}") + }; if formatted.contains(".") { Ok(Some(formatted)) diff --git a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala index 2188f8e9af..ffe5299a36 100644 --- a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala +++ b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala @@ -236,12 +236,7 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim { Compatible() case DataTypes.DateType => Compatible() case DataTypes.TimestampType => Compatible() - case DataTypes.FloatType | DataTypes.DoubleType => - Compatible( - Some( - "There can be differences in precision. " + - "For example, the input \"1.4E-45\" will produce 1.0E-45 " + - "instead of 1.4E-45")) + case DataTypes.FloatType | DataTypes.DoubleType => Compatible() case _: DecimalType => // https://github.com/apache/datafusion-comet/issues/1068 Compatible( diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala index d1d00e3398..dff19d914c 100644 --- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala @@ -486,21 +486,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { } test("cast FloatType to StringType") { - // https://github.com/apache/datafusion-comet/issues/312 - val r = new Random(0) - val values = Seq( - Float.MaxValue, - Float.MinValue, - Float.NaN, - Float.PositiveInfinity, - Float.NegativeInfinity, - 1.0f, - -1.0f, - Short.MinValue.toFloat, - Short.MaxValue.toFloat, - 0.0f) ++ - Range(0, dataSize).map(_ => r.nextFloat()) - castTest(withNulls(values).toDF("a"), DataTypes.StringType) + castTest(generateFloats(), DataTypes.StringType) } test("cast FloatType to TimestampType") { @@ -550,17 +536,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { } test("cast DoubleType to StringType") { - // https://github.com/apache/datafusion-comet/issues/312 - val r = new Random(0) - val values = Seq( - Double.MaxValue, - Double.MinValue, - Double.NaN, - Double.PositiveInfinity, - Double.NegativeInfinity, - 0.0d) ++ - Range(0, dataSize).map(_ => r.nextDouble()) - castTest(withNulls(values).toDF("a"), DataTypes.StringType) + castTest(generateDoubles(), DataTypes.StringType) } test("cast DoubleType to TimestampType") { @@ -1594,7 +1570,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { withSQLConf((SQLConf.ANSI_ENABLED.key, "false")) { // cast() should return null for invalid inputs when ansi mode is disabled - val df = data.select(col("a"), col("a").cast(toType)).orderBy(col("a")) + val df = data.select(col("a"), col("a").cast(toType)) if (useDataFrameDiff) { assertDataFrameEqualsWithExceptions(df, assertCometNative = !hasIncompatibleType) } else { @@ -1609,7 +1585,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { data.createOrReplaceTempView("t") // try_cast() should always return null for invalid inputs // not using spark DSL since it `try_cast` is only available from Spark 4x - val df2 = spark.sql(s"select a, try_cast(a as ${toType.sql}) from t order by a") + val df2 = spark.sql(s"select a, try_cast(a as ${toType.sql}) from t") if (hasIncompatibleType) { checkSparkAnswer(df2) } else { @@ -1677,7 +1653,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { // try_cast() should always return null for invalid inputs if (testTry) { data.createOrReplaceTempView("t") - val df2 = spark.sql(s"select a, try_cast(a as ${toType.sql}) from t order by a") + val df2 = spark.sql(s"select a, try_cast(a as ${toType.sql}) from t") if (useDataFrameDiff) { assertDataFrameEqualsWithExceptions(df2, assertCometNative = !hasIncompatibleType) } else { diff --git a/spark/src/test/scala/org/apache/comet/DataGenerator.scala b/spark/src/test/scala/org/apache/comet/DataGenerator.scala index 443a058bca..81119cf7d5 100644 --- a/spark/src/test/scala/org/apache/comet/DataGenerator.scala +++ b/spark/src/test/scala/org/apache/comet/DataGenerator.scala @@ -61,6 +61,7 @@ class DataGenerator(r: Random) { Seq( Float.MaxValue, Float.MinPositiveValue, + Float.MinPositiveValue * 2, Float.MinValue, Float.NaN, Float.PositiveInfinity, @@ -69,20 +70,35 @@ class DataGenerator(r: Random) { -1.0f, Short.MinValue.toFloat, Short.MaxValue.toFloat, + 0.0f, + -0.0f, 0.0f) ++ - Range(0, n).map(_ => r.nextFloat()) + Range(0, n).map(_ => r.nextFloat()) ++ + Range(0, n).map{_ => + Float.MinPositiveValue + r.nextFloat() * (java.lang.Float.MIN_NORMAL - Float.MinPositiveValue) + } } def generateDoubles(n: Int): Seq[Double] = { Seq( Double.MaxValue, Double.MinPositiveValue, + Double.MinPositiveValue * 2, Double.MinValue, Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity, + 1.0d, + -1.0d, + Int.MinValue.toDouble, + Int.MaxValue.toDouble, + 0.0d, + -0.0d, 0.0d) ++ - Range(0, n).map(_ => r.nextDouble()) + Range(0, n).map(_ => r.nextDouble()) ++ + Range(0, n).map{_ => + Double.MinPositiveValue + r.nextDouble() * (java.lang.Double.MIN_NORMAL - Double.MinPositiveValue) + } } def generateBytes(n: Int): Seq[Byte] = {