Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/contributor-guide/expression-audits/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ map_funcs <map_funcs>
math_funcs <math_funcs>
misc_funcs <misc_funcs>
predicate_funcs <predicate_funcs>
string_funcs <string_funcs>
struct_funcs <struct_funcs>
url_funcs <url_funcs>
window_funcs <window_funcs>
Expand Down
253 changes: 253 additions & 0 deletions docs/source/contributor-guide/expression-audits/string_funcs.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,6 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim {
classOf[StringSplit] -> CometStringSplit,
classOf[StringTranslate] -> CometScalarFunction("translate"),
classOf[StringTrim] -> CometScalarFunction("trim"),
classOf[StringTrimBoth] -> CometScalarFunction("btrim"),
classOf[StringTrimLeft] -> CometScalarFunction("ltrim"),
classOf[StringTrimRight] -> CometScalarFunction("rtrim"),
classOf[Left] -> CometLeft,
Expand Down
23 changes: 13 additions & 10 deletions spark/src/main/scala/org/apache/comet/serde/strings.scala
Original file line number Diff line number Diff line change
Expand Up @@ -349,18 +349,22 @@ object CometRLike extends CometExpressionSerde[RLike] {
}
}

private object PadReasons {
val literalStrReason = "Scalar values are not supported for the `str` argument."
val nonLiteralPadReason = "Only scalar values are supported for the `pad` argument."
}

object CometStringRPad extends CometExpressionSerde[StringRPad] {

override def getUnsupportedReasons(): Seq[String] = Seq(
"Scalar values are not supported for the `str` argument." +
" Only scalar values are supported for the `pad` argument.")
override def getUnsupportedReasons(): Seq[String] =
Seq(PadReasons.literalStrReason, PadReasons.nonLiteralPadReason)

override def getSupportLevel(expr: StringRPad): SupportLevel = {
if (expr.str.isInstanceOf[Literal]) {
return Unsupported(Some("Scalar values are not supported for the str argument"))
return Unsupported(Some(PadReasons.literalStrReason))
}
if (!expr.pad.isInstanceOf[Literal]) {
return Unsupported(Some("Only scalar values are supported for the pad argument"))
return Unsupported(Some(PadReasons.nonLiteralPadReason))
}
Compatible()
}
Expand All @@ -380,16 +384,15 @@ object CometStringRPad extends CometExpressionSerde[StringRPad] {

object CometStringLPad extends CometExpressionSerde[StringLPad] {

override def getUnsupportedReasons(): Seq[String] = Seq(
"Scalar values are not supported for the `str` argument." +
" Only scalar values are supported for the `pad` argument.")
override def getUnsupportedReasons(): Seq[String] =
Seq(PadReasons.literalStrReason, PadReasons.nonLiteralPadReason)

override def getSupportLevel(expr: StringLPad): SupportLevel = {
if (expr.str.isInstanceOf[Literal]) {
return Unsupported(Some("Scalar values are not supported for the str argument"))
return Unsupported(Some(PadReasons.literalStrReason))
}
if (!expr.pad.isInstanceOf[Literal]) {
return Unsupported(Some("Only scalar values are supported for the pad argument"))
return Unsupported(Some(PadReasons.nonLiteralPadReason))
}
Compatible()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
-- specific language governing permissions and limitations
-- under the License.

-- Test lower() with case conversion enabled (happy path)
-- Config: spark.comet.caseConversion.enabled=true
-- Test lower() with the standard allowIncompatible opt-in (happy path)
-- Config: spark.comet.expression.Lower.allowIncompatible=true

statement
CREATE TABLE test_lower_enabled(s string) USING parquet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ CREATE TABLE test_lpad(s string, len int, pad string) USING parquet
statement
INSERT INTO test_lpad VALUES ('hi', 5, 'x'), ('hello', 3, 'x'), ('hi', 5, 'xy'), ('', 3, 'a'), (NULL, 5, 'x'), ('hi', 0, 'x'), ('hi', -1, 'x')

query expect_fallback(Only scalar values are supported for the pad argument)
query expect_fallback(Only scalar values are supported for the `pad` argument)
SELECT lpad(s, len, pad) FROM test_lpad

query
Expand All @@ -32,5 +32,5 @@ query
SELECT lpad(s, 5, 'x') FROM test_lpad

-- literal + literal + literal
query expect_fallback(Scalar values are not supported for the str argument)
query expect_fallback(Scalar values are not supported for the `str` argument)
SELECT lpad('hi', 5, 'x'), lpad('hello', 3, 'x'), lpad('', 3, 'a'), lpad(NULL, 5, 'x')
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ CREATE TABLE test_rpad(s string, len int, pad string) USING parquet
statement
INSERT INTO test_rpad VALUES ('hi', 5, 'x'), ('hello', 3, 'x'), ('hi', 5, 'xy'), ('', 3, 'a'), (NULL, 5, 'x'), ('hi', 0, 'x'), ('hi', -1, 'x')

query expect_fallback(Only scalar values are supported for the pad argument)
query expect_fallback(Only scalar values are supported for the `pad` argument)
SELECT rpad(s, len, pad) FROM test_rpad

query
Expand All @@ -32,5 +32,5 @@ query
SELECT rpad(s, 5, 'x') FROM test_rpad

-- literal + literal + literal
query expect_fallback(Scalar values are not supported for the str argument)
query expect_fallback(Scalar values are not supported for the `str` argument)
SELECT rpad('hi', 5, 'x'), rpad('hello', 3, 'x'), rpad('', 3, 'a'), rpad(NULL, 5, 'x')
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
-- specific language governing permissions and limitations
-- under the License.

-- Test upper() with case conversion enabled (happy path)
-- Config: spark.comet.caseConversion.enabled=true
-- Test upper() with the standard allowIncompatible opt-in (happy path)
-- Config: spark.comet.expression.Upper.allowIncompatible=true

statement
CREATE TABLE test_upper_enabled(s string) USING parquet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,11 @@ class CometStringExpressionSuite extends CometTestBase {
} else if (isLiteralStr) {
checkSparkAnswerAndFallbackReason(
sql,
"Scalar values are not supported for the str argument")
"Scalar values are not supported for the `str` argument")
} else if (!isLiteralPad) {
checkSparkAnswerAndFallbackReason(
sql,
"Only scalar values are supported for the pad argument")
"Only scalar values are supported for the `pad` argument")
} else {
checkSparkAnswerAndOperator(sql)
}
Expand Down Expand Up @@ -261,7 +261,9 @@ class CometStringExpressionSuite extends CometTestBase {
}

test("Upper and Lower") {
withSQLConf(CometConf.COMET_CASE_CONVERSION_ENABLED.key -> "true") {
withSQLConf(
CometConf.getExprAllowIncompatConfigKey("Upper") -> "true",
CometConf.getExprAllowIncompatConfigKey("Lower") -> "true") {
val table = "names"
withTable(table) {
sql(s"create table $table(id int, name varchar(20)) using parquet")
Expand Down Expand Up @@ -339,7 +341,7 @@ class CometStringExpressionSuite extends CometTestBase {
}

test("trim") {
withSQLConf(CometConf.COMET_CASE_CONVERSION_ENABLED.key -> "true") {
withSQLConf(CometConf.getExprAllowIncompatConfigKey("Upper") -> "true") {
val table = "test"
withTable(table) {
sql(s"create table $table(col varchar(20)) using parquet")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,10 @@ object CometStringExpressionBenchmark extends CometBenchmarkBase {
dir,
spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 10) AS c1 FROM $tbl"))

val extraConfigs = Map(CometConf.COMET_CASE_CONVERSION_ENABLED.key -> "true")
val extraConfigs = Map(
CometConf.getExprAllowIncompatConfigKey("Upper") -> "true",
CometConf.getExprAllowIncompatConfigKey("Lower") -> "true",
CometConf.getExprAllowIncompatConfigKey("InitCap") -> "true")

stringExpressions.foreach { config =>
val allConfigs = extraConfigs ++ config.extraCometConfigs
Expand Down
Loading